[llvm] r298444 - AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 21 14:40:01 PDT 2017
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-constant-i8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-constant-i8.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-constant-i8.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-constant-i8.ll Tue Mar 21 16:39:51 2017
@@ -10,7 +10,7 @@
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; TODO: NOT AND
-define void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
entry:
%ld = load i8, i8 addrspace(2)* %in
store i8 %ld, i8 addrspace(1)* %out
@@ -22,7 +22,7 @@ entry:
; GCN-HSA: flat_load_ushort v
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
entry:
%ld = load <2 x i8>, <2 x i8> addrspace(2)* %in
store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
@@ -33,7 +33,7 @@ entry:
; GCN: s_load_dword s
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
entry:
%ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
store <3 x i8> %ld, <3 x i8> addrspace(1)* %out
@@ -44,7 +44,7 @@ entry:
; GCN: s_load_dword s
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @constant_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
entry:
%ld = load <4 x i8>, <4 x i8> addrspace(2)* %in
store <4 x i8> %ld, <4 x i8> addrspace(1)* %out
@@ -55,7 +55,7 @@ entry:
; GCN: s_load_dwordx2
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
-define void @constant_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
entry:
%ld = load <8 x i8>, <8 x i8> addrspace(2)* %in
store <8 x i8> %ld, <8 x i8> addrspace(1)* %out
@@ -66,7 +66,7 @@ entry:
; GCN: s_load_dwordx4
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
-define void @constant_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
entry:
%ld = load <16 x i8>, <16 x i8> addrspace(2)* %in
store <16 x i8> %ld, <16 x i8> addrspace(1)* %out
@@ -78,7 +78,7 @@ entry:
; GCN-HSA: flat_load_ubyte
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @constant_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
%a = load i8, i8 addrspace(2)* %in
%ext = zext i8 %a to i32
store i32 %ext, i32 addrspace(1)* %out
@@ -92,7 +92,7 @@ define void @constant_zextload_i8_to_i32
; EG: VTX_READ_8 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
; EG: 8
-define void @constant_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
%ld = load i8, i8 addrspace(2)* %in
%ext = sext i8 %ld to i32
store i32 %ext, i32 addrspace(1)* %out
@@ -102,7 +102,7 @@ define void @constant_sextload_i8_to_i32
; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i32:
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @constant_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(2)* %in
%ext = zext <1 x i8> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
@@ -114,7 +114,7 @@ define void @constant_zextload_v1i8_to_v
; EG: VTX_READ_8 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
; EG: 8
-define void @constant_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(2)* %in
%ext = sext <1 x i8> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
@@ -129,7 +129,7 @@ define void @constant_sextload_v1i8_to_v
; TODO: This should use DST, but for some there are redundant MOVs
; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
; EG: 8
-define void @constant_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(2)* %in
%ext = zext <2 x i8> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
@@ -150,7 +150,7 @@ define void @constant_zextload_v2i8_to_v
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: 8
; EG-DAG: 8
-define void @constant_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(2)* %in
%ext = sext <2 x i8> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
@@ -170,7 +170,7 @@ define void @constant_sextload_v2i8_to_v
; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
; EG-DAG: 8
; EG-DAG: 8
-define void @constant_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
entry:
%ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
%ext = zext <3 x i8> %ld to <3 x i32>
@@ -193,7 +193,7 @@ entry:
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define void @constant_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
entry:
%ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
%ext = sext <3 x i8> %ld to <3 x i32>
@@ -214,7 +214,7 @@ entry:
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define void @constant_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(2)* %in
%ext = zext <4 x i8> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
@@ -236,7 +236,7 @@ define void @constant_zextload_v4i8_to_v
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define void @constant_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(2)* %in
%ext = sext <4 x i8> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
@@ -264,7 +264,7 @@ define void @constant_sextload_v4i8_to_v
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define void @constant_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(2)* %in
%ext = zext <8 x i8> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
@@ -294,7 +294,7 @@ define void @constant_zextload_v8i8_to_v
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define void @constant_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(2)* %in
%ext = sext <8 x i8> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
@@ -335,7 +335,7 @@ define void @constant_sextload_v8i8_to_v
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define void @constant_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(2)* %in
%ext = zext <16 x i8> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
@@ -378,7 +378,7 @@ define void @constant_zextload_v16i8_to_
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define void @constant_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(2)* %in
%ext = sext <16 x i8> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
@@ -450,7 +450,7 @@ define void @constant_sextload_v16i8_to_
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define void @constant_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(2)* %in
%ext = zext <32 x i8> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
@@ -526,7 +526,7 @@ define void @constant_zextload_v32i8_to_
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define void @constant_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(2)* %in
%ext = sext <32 x i8> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
@@ -539,7 +539,7 @@ define void @constant_sextload_v32i8_to_
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
-define void @constant_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
%load = load <64 x i8>, <64 x i8> addrspace(2)* %in
%ext = zext <64 x i8> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
@@ -552,7 +552,7 @@ define void @constant_zextload_v64i8_to_
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
-define void @constant_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
%load = load <64 x i8>, <64 x i8> addrspace(2)* %in
%ext = sext <64 x i8> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
@@ -570,7 +570,7 @@ define void @constant_sextload_v64i8_to_
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG: MOV {{.*}}, 0.0
-define void @constant_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
%a = load i8, i8 addrspace(2)* %in
%ext = zext i8 %a to i64
store i64 %ext, i64 addrspace(1)* %out
@@ -589,7 +589,7 @@ define void @constant_zextload_i8_to_i64
; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
; TODO: Why not 7 ?
; EG: 31
-define void @constant_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
%a = load i8, i8 addrspace(2)* %in
%ext = sext i8 %a to i64
store i64 %ext, i64 addrspace(1)* %out
@@ -600,7 +600,7 @@ define void @constant_sextload_i8_to_i64
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG: MOV {{.*}}, 0.0
-define void @constant_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(2)* %in
%ext = zext <1 x i8> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
@@ -613,7 +613,7 @@ define void @constant_zextload_v1i8_to_v
; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
; TODO: Why not 7 ?
; EG: 31
-define void @constant_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(2)* %in
%ext = sext <1 x i8> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
@@ -623,7 +623,7 @@ define void @constant_sextload_v1i8_to_v
; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i64:
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @constant_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(2)* %in
%ext = zext <2 x i8> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
@@ -633,7 +633,7 @@ define void @constant_zextload_v2i8_to_v
; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i64:
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @constant_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(2)* %in
%ext = sext <2 x i8> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
@@ -643,7 +643,7 @@ define void @constant_sextload_v2i8_to_v
; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i64:
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @constant_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(2)* %in
%ext = zext <4 x i8> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
@@ -653,7 +653,7 @@ define void @constant_zextload_v4i8_to_v
; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i64:
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @constant_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(2)* %in
%ext = sext <4 x i8> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
@@ -663,7 +663,7 @@ define void @constant_sextload_v4i8_to_v
; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i64:
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
-define void @constant_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(2)* %in
%ext = zext <8 x i8> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
@@ -673,7 +673,7 @@ define void @constant_zextload_v8i8_to_v
; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i64:
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
-define void @constant_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(2)* %in
%ext = sext <8 x i8> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
@@ -683,7 +683,7 @@ define void @constant_sextload_v8i8_to_v
; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i64:
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
-define void @constant_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(2)* %in
%ext = zext <16 x i8> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
@@ -693,7 +693,7 @@ define void @constant_zextload_v16i8_to_
; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i64:
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
-define void @constant_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(2)* %in
%ext = sext <16 x i8> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
@@ -704,7 +704,7 @@ define void @constant_sextload_v16i8_to_
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
-define void @constant_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(2)* %in
%ext = zext <32 x i8> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
@@ -715,7 +715,7 @@ define void @constant_zextload_v32i8_to_
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
-define void @constant_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(2)* %in
%ext = sext <32 x i8> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
@@ -723,7 +723,7 @@ define void @constant_sextload_v32i8_to_
}
; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i64:
-; define void @constant_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
+; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
; %ext = zext <64 x i8> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
@@ -731,7 +731,7 @@ define void @constant_sextload_v32i8_to_
; }
; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i64:
-; define void @constant_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
+; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
; %ext = sext <64 x i8> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
@@ -744,7 +744,7 @@ define void @constant_sextload_v32i8_to_
; GCN-HSA: flat_load_ubyte v[[VAL:[0-9]+]],
; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
-define void @constant_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
%a = load i8, i8 addrspace(2)* %in
%ext = zext i8 %a to i16
store i16 %ext, i16 addrspace(1)* %out
@@ -759,7 +759,7 @@ define void @constant_zextload_i8_to_i16
; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @constant_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
%a = load i8, i8 addrspace(2)* %in
%ext = sext i8 %a to i16
store i16 %ext, i16 addrspace(1)* %out
@@ -767,7 +767,7 @@ define void @constant_sextload_i8_to_i16
}
; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i16:
-define void @constant_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(2)* %in
%ext = zext <1 x i8> %load to <1 x i16>
store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
@@ -778,7 +778,7 @@ define void @constant_zextload_v1i8_to_v
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define void @constant_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(2)* %in
%ext = sext <1 x i8> %load to <1 x i16>
store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
@@ -788,7 +788,7 @@ define void @constant_sextload_v1i8_to_v
; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i16:
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @constant_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(2)* %in
%ext = zext <2 x i8> %load to <2 x i16>
store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
@@ -800,7 +800,7 @@ define void @constant_zextload_v2i8_to_v
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define void @constant_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(2)* %in
%ext = sext <2 x i8> %load to <2 x i16>
store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
@@ -810,7 +810,7 @@ define void @constant_sextload_v2i8_to_v
; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i16:
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @constant_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(2)* %in
%ext = zext <4 x i8> %load to <4 x i16>
store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
@@ -824,7 +824,7 @@ define void @constant_zextload_v4i8_to_v
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define void @constant_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(2)* %in
%ext = sext <4 x i8> %load to <4 x i16>
store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
@@ -834,7 +834,7 @@ define void @constant_sextload_v4i8_to_v
; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i16:
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
-define void @constant_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(2)* %in
%ext = zext <8 x i8> %load to <8 x i16>
store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
@@ -853,7 +853,7 @@ define void @constant_zextload_v8i8_to_v
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define void @constant_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(2)* %in
%ext = sext <8 x i8> %load to <8 x i16>
store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
@@ -863,7 +863,7 @@ define void @constant_sextload_v8i8_to_v
; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i16:
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
-define void @constant_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(2)* %in
%ext = zext <16 x i8> %load to <16 x i16>
store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
@@ -889,7 +889,7 @@ define void @constant_zextload_v16i8_to_
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define void @constant_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(2)* %in
%ext = sext <16 x i8> %load to <16 x i16>
store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
@@ -900,7 +900,7 @@ define void @constant_sextload_v16i8_to_
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
-define void @constant_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(2)* %in
%ext = zext <32 x i8> %load to <32 x i16>
store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
@@ -943,7 +943,7 @@ define void @constant_zextload_v32i8_to_
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define void @constant_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(2)* %in
%ext = sext <32 x i8> %load to <32 x i16>
store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
@@ -951,7 +951,7 @@ define void @constant_sextload_v32i8_to_
}
; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i16:
-; define void @constant_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
+; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
; %ext = zext <64 x i8> %load to <64 x i16>
; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
@@ -959,7 +959,7 @@ define void @constant_sextload_v32i8_to_
; }
; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i16:
-; define void @constant_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
+; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
; %ext = sext <64 x i8> %load to <64 x i16>
; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-global-f32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-global-f32.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-global-f32.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-global-f32.ll Tue Mar 21 16:39:51 2017
@@ -10,7 +10,7 @@
; GCN-HSA: flat_load_dword
; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-define void @global_load_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
entry:
%tmp0 = load float, float addrspace(1)* %in
store float %tmp0, float addrspace(1)* %out
@@ -22,7 +22,7 @@ entry:
; GCN-HSA: flat_load_dwordx2
; R600: VTX_READ_64
-define void @global_load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
entry:
%tmp0 = load <2 x float>, <2 x float> addrspace(1)* %in
store <2 x float> %tmp0, <2 x float> addrspace(1)* %out
@@ -34,7 +34,7 @@ entry:
; GCN-HSA: flat_load_dwordx4
; R600: VTX_READ_128
-define void @global_load_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
entry:
%tmp0 = load <3 x float>, <3 x float> addrspace(1)* %in
store <3 x float> %tmp0, <3 x float> addrspace(1)* %out
@@ -46,7 +46,7 @@ entry:
; GCN-HSA: flat_load_dwordx4
; R600: VTX_READ_128
-define void @global_load_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
entry:
%tmp0 = load <4 x float>, <4 x float> addrspace(1)* %in
store <4 x float> %tmp0, <4 x float> addrspace(1)* %out
@@ -61,7 +61,7 @@ entry:
; R600: VTX_READ_128
; R600: VTX_READ_128
-define void @global_load_v8f32(<8 x float> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v8f32(<8 x float> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {
entry:
%tmp0 = load <8 x float>, <8 x float> addrspace(1)* %in
store <8 x float> %tmp0, <8 x float> addrspace(1)* %out
@@ -83,7 +83,7 @@ entry:
; R600: VTX_READ_128
; R600: VTX_READ_128
; R600: VTX_READ_128
-define void @global_load_v16f32(<16 x float> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v16f32(<16 x float> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
entry:
%tmp0 = load <16 x float>, <16 x float> addrspace(1)* %in
store <16 x float> %tmp0, <16 x float> addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-global-f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-global-f64.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-global-f64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-global-f64.ll Tue Mar 21 16:39:51 2017
@@ -8,7 +8,7 @@
; GCN-HSA: flat_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
; GCN-HSA: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, [[VAL]]
-define void @global_load_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
%ld = load double, double addrspace(1)* %in
store double %ld, double addrspace(1)* %out
ret void
@@ -17,7 +17,7 @@ define void @global_load_f64(double addr
; FUNC-LABEL: {{^}}global_load_v2f64:
; GCN-NOHSA: buffer_load_dwordx4
; GCN-HSA: flat_load_dwordx4
-define void @global_load_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in) #0 {
entry:
%ld = load <2 x double>, <2 x double> addrspace(1)* %in
store <2 x double> %ld, <2 x double> addrspace(1)* %out
@@ -29,7 +29,7 @@ entry:
; GCN-NOHSA: buffer_load_dwordx4
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
-define void @global_load_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %in) #0 {
entry:
%ld = load <3 x double>, <3 x double> addrspace(1)* %in
store <3 x double> %ld, <3 x double> addrspace(1)* %out
@@ -42,7 +42,7 @@ entry:
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
-define void @global_load_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) #0 {
entry:
%ld = load <4 x double>, <4 x double> addrspace(1)* %in
store <4 x double> %ld, <4 x double> addrspace(1)* %out
@@ -59,7 +59,7 @@ entry:
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
-define void @global_load_v8f64(<8 x double> addrspace(1)* %out, <8 x double> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v8f64(<8 x double> addrspace(1)* %out, <8 x double> addrspace(1)* %in) #0 {
entry:
%ld = load <8 x double>, <8 x double> addrspace(1)* %in
store <8 x double> %ld, <8 x double> addrspace(1)* %out
@@ -84,7 +84,7 @@ entry:
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
-define void @global_load_v16f64(<16 x double> addrspace(1)* %out, <16 x double> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v16f64(<16 x double> addrspace(1)* %out, <16 x double> addrspace(1)* %in) #0 {
entry:
%ld = load <16 x double>, <16 x double> addrspace(1)* %in
store <16 x double> %ld, <16 x double> addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-global-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-global-i1.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-global-i1.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-global-i1.ll Tue Mar 21 16:39:51 2017
@@ -9,56 +9,56 @@
; EG: VTX_READ_8
; EG: AND_INT
-define void @global_load_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
%load = load i1, i1 addrspace(1)* %in
store i1 %load, i1 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}global_load_v2i1:
-define void @global_load_v2i1(<2 x i1> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v2i1(<2 x i1> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
%load = load <2 x i1>, <2 x i1> addrspace(1)* %in
store <2 x i1> %load, <2 x i1> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}global_load_v3i1:
-define void @global_load_v3i1(<3 x i1> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v3i1(<3 x i1> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
%load = load <3 x i1>, <3 x i1> addrspace(1)* %in
store <3 x i1> %load, <3 x i1> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}global_load_v4i1:
-define void @global_load_v4i1(<4 x i1> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v4i1(<4 x i1> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
%load = load <4 x i1>, <4 x i1> addrspace(1)* %in
store <4 x i1> %load, <4 x i1> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}global_load_v8i1:
-define void @global_load_v8i1(<8 x i1> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v8i1(<8 x i1> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
%load = load <8 x i1>, <8 x i1> addrspace(1)* %in
store <8 x i1> %load, <8 x i1> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}global_load_v16i1:
-define void @global_load_v16i1(<16 x i1> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v16i1(<16 x i1> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
%load = load <16 x i1>, <16 x i1> addrspace(1)* %in
store <16 x i1> %load, <16 x i1> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}global_load_v32i1:
-define void @global_load_v32i1(<32 x i1> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v32i1(<32 x i1> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
%load = load <32 x i1>, <32 x i1> addrspace(1)* %in
store <32 x i1> %load, <32 x i1> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}global_load_v64i1:
-define void @global_load_v64i1(<64 x i1> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v64i1(<64 x i1> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
%load = load <64 x i1>, <64 x i1> addrspace(1)* %in
store <64 x i1> %load, <64 x i1> addrspace(1)* %out
ret void
@@ -67,7 +67,7 @@ define void @global_load_v64i1(<64 x i1>
; FUNC-LABEL: {{^}}global_zextload_i1_to_i32:
; GCN: buffer_load_ubyte
; GCN: buffer_store_dword
-define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
%a = load i1, i1 addrspace(1)* %in
%ext = zext i1 %a to i32
store i32 %ext, i32 addrspace(1)* %out
@@ -81,7 +81,7 @@ define void @global_zextload_i1_to_i32(i
; EG: VTX_READ_8
; EG: BFE_INT
-define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
%a = load i1, i1 addrspace(1)* %in
%ext = sext i1 %a to i32
store i32 %ext, i32 addrspace(1)* %out
@@ -89,7 +89,7 @@ define void @global_sextload_i1_to_i32(i
}
; FUNC-LABEL: {{^}}global_zextload_v1i1_to_v1i32:
-define void @global_zextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
%load = load <1 x i1>, <1 x i1> addrspace(1)* %in
%ext = zext <1 x i1> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
@@ -97,7 +97,7 @@ define void @global_zextload_v1i1_to_v1i
}
; FUNC-LABEL: {{^}}global_sextload_v1i1_to_v1i32:
-define void @global_sextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
%load = load <1 x i1>, <1 x i1> addrspace(1)* %in
%ext = sext <1 x i1> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
@@ -105,7 +105,7 @@ define void @global_sextload_v1i1_to_v1i
}
; FUNC-LABEL: {{^}}global_zextload_v2i1_to_v2i32:
-define void @global_zextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
%load = load <2 x i1>, <2 x i1> addrspace(1)* %in
%ext = zext <2 x i1> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
@@ -113,7 +113,7 @@ define void @global_zextload_v2i1_to_v2i
}
; FUNC-LABEL: {{^}}global_sextload_v2i1_to_v2i32:
-define void @global_sextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
%load = load <2 x i1>, <2 x i1> addrspace(1)* %in
%ext = sext <2 x i1> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
@@ -121,7 +121,7 @@ define void @global_sextload_v2i1_to_v2i
}
; FUNC-LABEL: {{^}}global_zextload_v3i1_to_v3i32:
-define void @global_zextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
%load = load <3 x i1>, <3 x i1> addrspace(1)* %in
%ext = zext <3 x i1> %load to <3 x i32>
store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
@@ -129,7 +129,7 @@ define void @global_zextload_v3i1_to_v3i
}
; FUNC-LABEL: {{^}}global_sextload_v3i1_to_v3i32:
-define void @global_sextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
%load = load <3 x i1>, <3 x i1> addrspace(1)* %in
%ext = sext <3 x i1> %load to <3 x i32>
store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
@@ -137,7 +137,7 @@ define void @global_sextload_v3i1_to_v3i
}
; FUNC-LABEL: {{^}}global_zextload_v4i1_to_v4i32:
-define void @global_zextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
%load = load <4 x i1>, <4 x i1> addrspace(1)* %in
%ext = zext <4 x i1> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
@@ -145,7 +145,7 @@ define void @global_zextload_v4i1_to_v4i
}
; FUNC-LABEL: {{^}}global_sextload_v4i1_to_v4i32:
-define void @global_sextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
%load = load <4 x i1>, <4 x i1> addrspace(1)* %in
%ext = sext <4 x i1> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
@@ -153,7 +153,7 @@ define void @global_sextload_v4i1_to_v4i
}
; FUNC-LABEL: {{^}}global_zextload_v8i1_to_v8i32:
-define void @global_zextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
%load = load <8 x i1>, <8 x i1> addrspace(1)* %in
%ext = zext <8 x i1> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
@@ -161,7 +161,7 @@ define void @global_zextload_v8i1_to_v8i
}
; FUNC-LABEL: {{^}}global_sextload_v8i1_to_v8i32:
-define void @global_sextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
%load = load <8 x i1>, <8 x i1> addrspace(1)* %in
%ext = sext <8 x i1> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
@@ -169,7 +169,7 @@ define void @global_sextload_v8i1_to_v8i
}
; FUNC-LABEL: {{^}}global_zextload_v16i1_to_v16i32:
-define void @global_zextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
%load = load <16 x i1>, <16 x i1> addrspace(1)* %in
%ext = zext <16 x i1> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
@@ -177,7 +177,7 @@ define void @global_zextload_v16i1_to_v1
}
; FUNC-LABEL: {{^}}global_sextload_v16i1_to_v16i32:
-define void @global_sextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
%load = load <16 x i1>, <16 x i1> addrspace(1)* %in
%ext = sext <16 x i1> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
@@ -185,7 +185,7 @@ define void @global_sextload_v16i1_to_v1
}
; FUNC-LABEL: {{^}}global_zextload_v32i1_to_v32i32:
-define void @global_zextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
%load = load <32 x i1>, <32 x i1> addrspace(1)* %in
%ext = zext <32 x i1> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
@@ -193,7 +193,7 @@ define void @global_zextload_v32i1_to_v3
}
; FUNC-LABEL: {{^}}global_sextload_v32i1_to_v32i32:
-define void @global_sextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
%load = load <32 x i1>, <32 x i1> addrspace(1)* %in
%ext = sext <32 x i1> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
@@ -201,7 +201,7 @@ define void @global_sextload_v32i1_to_v3
}
; FUNC-LABEL: {{^}}global_zextload_v64i1_to_v64i32:
-define void @global_zextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
%load = load <64 x i1>, <64 x i1> addrspace(1)* %in
%ext = zext <64 x i1> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
@@ -209,7 +209,7 @@ define void @global_zextload_v64i1_to_v6
}
; FUNC-LABEL: {{^}}global_sextload_v64i1_to_v64i32:
-define void @global_sextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
%load = load <64 x i1>, <64 x i1> addrspace(1)* %in
%ext = sext <64 x i1> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
@@ -221,7 +221,7 @@ define void @global_sextload_v64i1_to_v6
; GCN-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
; GCN-DAG: v_and_b32_e32 {{v[0-9]+}}, 1, [[LOAD]]{{$}}
; GCN: buffer_store_dwordx2
-define void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
%a = load i1, i1 addrspace(1)* %in
%ext = zext i1 %a to i64
store i64 %ext, i64 addrspace(1)* %out
@@ -233,7 +233,7 @@ define void @global_zextload_i1_to_i64(i
; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
; GCN: buffer_store_dwordx2
-define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
%a = load i1, i1 addrspace(1)* %in
%ext = sext i1 %a to i64
store i64 %ext, i64 addrspace(1)* %out
@@ -241,7 +241,7 @@ define void @global_sextload_i1_to_i64(i
}
; FUNC-LABEL: {{^}}global_zextload_v1i1_to_v1i64:
-define void @global_zextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
%load = load <1 x i1>, <1 x i1> addrspace(1)* %in
%ext = zext <1 x i1> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
@@ -249,7 +249,7 @@ define void @global_zextload_v1i1_to_v1i
}
; FUNC-LABEL: {{^}}global_sextload_v1i1_to_v1i64:
-define void @global_sextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
%load = load <1 x i1>, <1 x i1> addrspace(1)* %in
%ext = sext <1 x i1> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
@@ -257,7 +257,7 @@ define void @global_sextload_v1i1_to_v1i
}
; FUNC-LABEL: {{^}}global_zextload_v2i1_to_v2i64:
-define void @global_zextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
%load = load <2 x i1>, <2 x i1> addrspace(1)* %in
%ext = zext <2 x i1> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
@@ -265,7 +265,7 @@ define void @global_zextload_v2i1_to_v2i
}
; FUNC-LABEL: {{^}}global_sextload_v2i1_to_v2i64:
-define void @global_sextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
%load = load <2 x i1>, <2 x i1> addrspace(1)* %in
%ext = sext <2 x i1> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
@@ -273,7 +273,7 @@ define void @global_sextload_v2i1_to_v2i
}
; FUNC-LABEL: {{^}}global_zextload_v3i1_to_v3i64:
-define void @global_zextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
%load = load <3 x i1>, <3 x i1> addrspace(1)* %in
%ext = zext <3 x i1> %load to <3 x i64>
store <3 x i64> %ext, <3 x i64> addrspace(1)* %out
@@ -281,7 +281,7 @@ define void @global_zextload_v3i1_to_v3i
}
; FUNC-LABEL: {{^}}global_sextload_v3i1_to_v3i64:
-define void @global_sextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
%load = load <3 x i1>, <3 x i1> addrspace(1)* %in
%ext = sext <3 x i1> %load to <3 x i64>
store <3 x i64> %ext, <3 x i64> addrspace(1)* %out
@@ -289,7 +289,7 @@ define void @global_sextload_v3i1_to_v3i
}
; FUNC-LABEL: {{^}}global_zextload_v4i1_to_v4i64:
-define void @global_zextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
%load = load <4 x i1>, <4 x i1> addrspace(1)* %in
%ext = zext <4 x i1> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
@@ -297,7 +297,7 @@ define void @global_zextload_v4i1_to_v4i
}
; FUNC-LABEL: {{^}}global_sextload_v4i1_to_v4i64:
-define void @global_sextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
%load = load <4 x i1>, <4 x i1> addrspace(1)* %in
%ext = sext <4 x i1> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
@@ -305,7 +305,7 @@ define void @global_sextload_v4i1_to_v4i
}
; FUNC-LABEL: {{^}}global_zextload_v8i1_to_v8i64:
-define void @global_zextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
%load = load <8 x i1>, <8 x i1> addrspace(1)* %in
%ext = zext <8 x i1> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
@@ -313,7 +313,7 @@ define void @global_zextload_v8i1_to_v8i
}
; FUNC-LABEL: {{^}}global_sextload_v8i1_to_v8i64:
-define void @global_sextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
%load = load <8 x i1>, <8 x i1> addrspace(1)* %in
%ext = sext <8 x i1> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
@@ -321,7 +321,7 @@ define void @global_sextload_v8i1_to_v8i
}
; FUNC-LABEL: {{^}}global_zextload_v16i1_to_v16i64:
-define void @global_zextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
%load = load <16 x i1>, <16 x i1> addrspace(1)* %in
%ext = zext <16 x i1> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
@@ -329,7 +329,7 @@ define void @global_zextload_v16i1_to_v1
}
; FUNC-LABEL: {{^}}global_sextload_v16i1_to_v16i64:
-define void @global_sextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
%load = load <16 x i1>, <16 x i1> addrspace(1)* %in
%ext = sext <16 x i1> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
@@ -337,7 +337,7 @@ define void @global_sextload_v16i1_to_v1
}
; FUNC-LABEL: {{^}}global_zextload_v32i1_to_v32i64:
-define void @global_zextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
%load = load <32 x i1>, <32 x i1> addrspace(1)* %in
%ext = zext <32 x i1> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
@@ -345,7 +345,7 @@ define void @global_zextload_v32i1_to_v3
}
; FUNC-LABEL: {{^}}global_sextload_v32i1_to_v32i64:
-define void @global_sextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
%load = load <32 x i1>, <32 x i1> addrspace(1)* %in
%ext = sext <32 x i1> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
@@ -353,7 +353,7 @@ define void @global_sextload_v32i1_to_v3
}
; FUNC-LABEL: {{^}}global_zextload_v64i1_to_v64i64:
-define void @global_zextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
%load = load <64 x i1>, <64 x i1> addrspace(1)* %in
%ext = zext <64 x i1> %load to <64 x i64>
store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
@@ -361,7 +361,7 @@ define void @global_zextload_v64i1_to_v6
}
; FUNC-LABEL: {{^}}global_sextload_v64i1_to_v64i64:
-define void @global_sextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
%load = load <64 x i1>, <64 x i1> addrspace(1)* %in
%ext = sext <64 x i1> %load to <64 x i64>
store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-global-i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-global-i16.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-global-i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-global-i16.ll Tue Mar 21 16:39:51 2017
@@ -11,7 +11,7 @@
; GCN-HSA: flat_load_ushort
; EGCM: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @global_load_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
+define amdgpu_kernel void @global_load_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
entry:
%ld = load i16, i16 addrspace(1)* %in
store i16 %ld, i16 addrspace(1)* %out
@@ -23,7 +23,7 @@ entry:
; GCN-HSA: flat_load_dword v
; EGCM: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @global_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @global_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
entry:
%ld = load <2 x i16>, <2 x i16> addrspace(1)* %in
store <2 x i16> %ld, <2 x i16> addrspace(1)* %out
@@ -36,7 +36,7 @@ entry:
; EGCM-DAG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EGCM-DAG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 4, #1
-define void @global_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @global_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
entry:
%ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
store <3 x i16> %ld, <3 x i16> addrspace(1)* %out
@@ -48,7 +48,7 @@ entry:
; GCN-HSA: flat_load_dwordx2
; EGCM: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
-define void @global_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @global_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
entry:
%ld = load <4 x i16>, <4 x i16> addrspace(1)* %in
store <4 x i16> %ld, <4 x i16> addrspace(1)* %out
@@ -60,7 +60,7 @@ entry:
; GCN-HSA: flat_load_dwordx4
; EGCM: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
-define void @global_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @global_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) {
entry:
%ld = load <8 x i16>, <8 x i16> addrspace(1)* %in
store <8 x i16> %ld, <8 x i16> addrspace(1)* %out
@@ -76,7 +76,7 @@ entry:
; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
-define void @global_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @global_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) {
entry:
%ld = load <16 x i16>, <16 x i16> addrspace(1)* %in
store <16 x i16> %ld, <16 x i16> addrspace(1)* %out
@@ -91,7 +91,7 @@ entry:
; GCN-HSA: flat_store_dword
; EGCM: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @global_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
%a = load i16, i16 addrspace(1)* %in
%ext = zext i16 %a to i32
store i32 %ext, i32 addrspace(1)* %out
@@ -108,7 +108,7 @@ define void @global_zextload_i16_to_i32(
; EGCM: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], T{{[0-9]+}}.X, 0, #1
; EGCM: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
; EGCM: 16
-define void @global_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
%a = load i16, i16 addrspace(1)* %in
%ext = sext i16 %a to i32
store i32 %ext, i32 addrspace(1)* %out
@@ -120,7 +120,7 @@ define void @global_sextload_i16_to_i32(
; GCN-HSA: flat_load_ushort
; EGCM: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @global_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
%load = load <1 x i16>, <1 x i16> addrspace(1)* %in
%ext = zext <1 x i16> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
@@ -134,7 +134,7 @@ define void @global_zextload_v1i16_to_v1
; EGCM: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], T{{[0-9]+}}.X, 0, #1
; EGCM: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
; EGCM: 16
-define void @global_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
%load = load <1 x i16>, <1 x i16> addrspace(1)* %in
%ext = sext <1 x i16> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
@@ -148,7 +148,7 @@ define void @global_sextload_v1i16_to_v1
; EGCM: VTX_READ_32 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
; EGCM: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], literal
; EGCM: 16
-define void @global_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
%load = load <2 x i16>, <2 x i16> addrspace(1)* %in
%ext = zext <2 x i16> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
@@ -168,7 +168,7 @@ define void @global_zextload_v2i16_to_v2
; EGCM-DAG: BFE_INT {{[* ]*}}[[ST]].Y, {{PV.[XYZW]}}, 0.0, literal
; EGCM-DAG: 16
; EGCM-DAG: 16
-define void @global_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
%load = load <2 x i16>, <2 x i16> addrspace(1)* %in
%ext = sext <2 x i16> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
@@ -190,7 +190,7 @@ define void @global_sextload_v2i16_to_v2
; EGCM: 16
; EGCM: AND_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, literal
; EGCM: AND_INT {{[* ]*}}[[ST_HI]].X, [[DST_HI]], literal
-define void @global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
entry:
%ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
%ext = zext <3 x i16> %ld to <3 x i32>
@@ -214,7 +214,7 @@ entry:
; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_HI]].X, [[DST_HI]], 0.0, literal
; EGCM-DAG: 16
; EGCM-DAG: 16
-define void @global_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @global_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
entry:
%ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
%ext = sext <3 x i16> %ld to <3 x i32>
@@ -237,7 +237,7 @@ entry:
; EGCM-DAG: AND_INT {{[* ]*}}[[ST]].X, {{.*}}, literal
; EGCM-DAG: AND_INT {{[* ]*}}[[ST]].Z, {{.*}}, literal
; EGCM-DAG: 16
-define void @global_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
%load = load <4 x i16>, <4 x i16> addrspace(1)* %in
%ext = zext <4 x i16> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
@@ -262,7 +262,7 @@ define void @global_zextload_v4i16_to_v4
; EGCM-DAG: 16
; EGCM-DAG: 16
; EGCM-DAG: 16
-define void @global_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
%load = load <4 x i16>, <4 x i16> addrspace(1)* %in
%ext = sext <4 x i16> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
@@ -296,7 +296,7 @@ define void @global_sextload_v4i16_to_v4
; EGCM-DAG: 16
; EGCM-DAG: 16
; EGCM-DAG: 16
-define void @global_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
%load = load <8 x i16>, <8 x i16> addrspace(1)* %in
%ext = zext <8 x i16> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
@@ -330,7 +330,7 @@ define void @global_zextload_v8i16_to_v8
; EGCM-DAG: 16
; EGCM-DAG: 16
; EGCM-DAG: 16
-define void @global_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
%load = load <8 x i16>, <8 x i16> addrspace(1)* %in
%ext = sext <8 x i16> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
@@ -346,7 +346,7 @@ define void @global_sextload_v8i16_to_v8
; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1
; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1
-define void @global_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
%load = load <16 x i16>, <16 x i16> addrspace(1)* %in
%ext = zext <16 x i16> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
@@ -357,7 +357,7 @@ define void @global_zextload_v16i16_to_v
; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1
; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1
-define void @global_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
%load = load <16 x i16>, <16 x i16> addrspace(1)* %in
%ext = sext <16 x i16> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
@@ -379,7 +379,7 @@ define void @global_sextload_v16i16_to_v
; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1
; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 32, #1
; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 48, #1
-define void @global_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
%load = load <32 x i16>, <32 x i16> addrspace(1)* %in
%ext = zext <32 x i16> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
@@ -401,7 +401,7 @@ define void @global_zextload_v32i16_to_v
; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1
; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 32, #1
; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 48, #1
-define void @global_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
%load = load <32 x i16>, <32 x i16> addrspace(1)* %in
%ext = sext <32 x i16> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
@@ -435,7 +435,7 @@ define void @global_sextload_v32i16_to_v
; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 80, #1
; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 96, #1
; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 112, #1
-define void @global_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
%load = load <64 x i16>, <64 x i16> addrspace(1)* %in
%ext = zext <64 x i16> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
@@ -452,7 +452,7 @@ define void @global_zextload_v64i16_to_v
; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 80, #1
; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 96, #1
; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 112, #1
-define void @global_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
%load = load <64 x i16>, <64 x i16> addrspace(1)* %in
%ext = sext <64 x i16> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
@@ -469,7 +469,7 @@ define void @global_sextload_v64i16_to_v
; EGCM: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EGCM: MOV {{.*}}, 0.0
-define void @global_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
%a = load i16, i16 addrspace(1)* %in
%ext = zext i16 %a to i64
store i64 %ext, i64 addrspace(1)* %out
@@ -495,7 +495,7 @@ define void @global_zextload_i16_to_i64(
; EGCM: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
; TODO: These could be expanded earlier using ASHR 15
; EGCM: 31
-define void @global_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
%a = load i16, i16 addrspace(1)* %in
%ext = sext i16 %a to i64
store i64 %ext, i64 addrspace(1)* %out
@@ -506,7 +506,7 @@ define void @global_sextload_i16_to_i64(
; EGCM: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EGCM: MOV {{.*}}, 0.0
-define void @global_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
%load = load <1 x i16>, <1 x i16> addrspace(1)* %in
%ext = zext <1 x i16> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
@@ -519,7 +519,7 @@ define void @global_zextload_v1i16_to_v1
; EGCM: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
; TODO: These could be expanded earlier using ASHR 15
; EGCM: 31
-define void @global_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
%load = load <1 x i16>, <1 x i16> addrspace(1)* %in
%ext = sext <1 x i16> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
@@ -527,7 +527,7 @@ define void @global_sextload_v1i16_to_v1
}
; FUNC-LABEL: {{^}}global_zextload_v2i16_to_v2i64:
-define void @global_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
%load = load <2 x i16>, <2 x i16> addrspace(1)* %in
%ext = zext <2 x i16> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
@@ -537,7 +537,7 @@ define void @global_zextload_v2i16_to_v2
; FUNC-LABEL: {{^}}global_sextload_v2i16_to_v2i64:
; EGCM: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @global_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
%load = load <2 x i16>, <2 x i16> addrspace(1)* %in
%ext = sext <2 x i16> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
@@ -547,7 +547,7 @@ define void @global_sextload_v2i16_to_v2
; FUNC-LABEL: {{^}}global_zextload_v4i16_to_v4i64:
; EGCM: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
-define void @global_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
%load = load <4 x i16>, <4 x i16> addrspace(1)* %in
%ext = zext <4 x i16> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
@@ -557,7 +557,7 @@ define void @global_zextload_v4i16_to_v4
; FUNC-LABEL: {{^}}global_sextload_v4i16_to_v4i64:
; EGCM: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
-define void @global_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
%load = load <4 x i16>, <4 x i16> addrspace(1)* %in
%ext = sext <4 x i16> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
@@ -567,7 +567,7 @@ define void @global_sextload_v4i16_to_v4
; FUNC-LABEL: {{^}}global_zextload_v8i16_to_v8i64:
; EGCM: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
-define void @global_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
%load = load <8 x i16>, <8 x i16> addrspace(1)* %in
%ext = zext <8 x i16> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
@@ -577,7 +577,7 @@ define void @global_zextload_v8i16_to_v8
; FUNC-LABEL: {{^}}global_sextload_v8i16_to_v8i64:
; EGCM: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
-define void @global_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
%load = load <8 x i16>, <8 x i16> addrspace(1)* %in
%ext = sext <8 x i16> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
@@ -588,7 +588,7 @@ define void @global_sextload_v8i16_to_v8
; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
-define void @global_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
%load = load <16 x i16>, <16 x i16> addrspace(1)* %in
%ext = zext <16 x i16> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
@@ -599,7 +599,7 @@ define void @global_zextload_v16i16_to_v
; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
-define void @global_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
%load = load <16 x i16>, <16 x i16> addrspace(1)* %in
%ext = sext <16 x i16> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
@@ -612,7 +612,7 @@ define void @global_sextload_v16i16_to_v
; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1
; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1
-define void @global_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
%load = load <32 x i16>, <32 x i16> addrspace(1)* %in
%ext = zext <32 x i16> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
@@ -625,7 +625,7 @@ define void @global_zextload_v32i16_to_v
; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1
; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1
-define void @global_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
%load = load <32 x i16>, <32 x i16> addrspace(1)* %in
%ext = sext <32 x i16> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
@@ -633,7 +633,7 @@ define void @global_sextload_v32i16_to_v
}
; ; XFUNC-LABEL: {{^}}global_zextload_v64i16_to_v64i64:
-; define void @global_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
+; define amdgpu_kernel void @global_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
; %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
; %ext = zext <64 x i16> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
@@ -641,7 +641,7 @@ define void @global_sextload_v32i16_to_v
; }
; ; XFUNC-LABEL: {{^}}global_sextload_v64i16_to_v64i64:
-; define void @global_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
+; define amdgpu_kernel void @global_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
; %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
; %ext = sext <64 x i16> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-global-i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-global-i32.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-global-i32.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-global-i32.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@
; GCN-HSA: flat_load_dword
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-define void @global_load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
entry:
%ld = load i32, i32 addrspace(1)* %in
store i32 %ld, i32 addrspace(1)* %out
@@ -21,7 +21,7 @@ entry:
; GCN-HSA: flat_load_dwordx2
; EG: VTX_READ_64
-define void @global_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
entry:
%ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
store <2 x i32> %ld, <2 x i32> addrspace(1)* %out
@@ -33,7 +33,7 @@ entry:
; GCN-HSA: flat_load_dwordx4
; EG: VTX_READ_128
-define void @global_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %in) #0 {
entry:
%ld = load <3 x i32>, <3 x i32> addrspace(1)* %in
store <3 x i32> %ld, <3 x i32> addrspace(1)* %out
@@ -45,7 +45,7 @@ entry:
; GCN-HSA: flat_load_dwordx4
; EG: VTX_READ_128
-define void @global_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
entry:
%ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
store <4 x i32> %ld, <4 x i32> addrspace(1)* %out
@@ -60,7 +60,7 @@ entry:
; EG: VTX_READ_128
; EG: VTX_READ_128
-define void @global_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
entry:
%ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
store <8 x i32> %ld, <8 x i32> addrspace(1)* %out
@@ -82,7 +82,7 @@ entry:
; EG: VTX_READ_128
; EG: VTX_READ_128
; EG: VTX_READ_128
-define void @global_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
entry:
%ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
store <16 x i32> %ld, <16 x i32> addrspace(1)* %out
@@ -98,7 +98,7 @@ entry:
; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]]
; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
-define void @global_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%ld = load i32, i32 addrspace(1)* %in
%ext = zext i32 %ld to i64
store i64 %ext, i64 addrspace(1)* %out
@@ -117,7 +117,7 @@ define void @global_zextload_i32_to_i64(
; EG: VTX_READ_32
; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.
; EG: 31
-define void @global_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%ld = load i32, i32 addrspace(1)* %in
%ext = sext i32 %ld to i64
store i64 %ext, i64 addrspace(1)* %out
@@ -130,7 +130,7 @@ define void @global_sextload_i32_to_i64(
; GCN-HSA: flat_load_dword
; GCN-HSA: flat_store_dwordx2
-define void @global_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 {
%ld = load <1 x i32>, <1 x i32> addrspace(1)* %in
%ext = zext <1 x i32> %ld to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
@@ -143,7 +143,7 @@ define void @global_zextload_v1i32_to_v1
; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @global_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 {
%ld = load <1 x i32>, <1 x i32> addrspace(1)* %in
%ext = sext <1 x i32> %ld to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
@@ -156,7 +156,7 @@ define void @global_sextload_v1i32_to_v1
; GCN-HSA: flat_load_dwordx2
; GCN-HSA: flat_store_dwordx4
-define void @global_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
%ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
%ext = zext <2 x i32> %ld to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
@@ -172,7 +172,7 @@ define void @global_zextload_v2i32_to_v2
; GCN-NOHSA-DAG: buffer_store_dwordx4
; GCN-HSA-DAG: flat_store_dwordx4
-define void @global_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
%ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
%ext = sext <2 x i32> %ld to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
@@ -187,7 +187,7 @@ define void @global_sextload_v2i32_to_v2
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_store_dwordx4
; GCN-HSA: flat_store_dwordx4
-define void @global_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
%ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
%ext = zext <4 x i32> %ld to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
@@ -208,7 +208,7 @@ define void @global_zextload_v4i32_to_v4
; GCN-HSA-DAG: flat_store_dwordx4
; GCN-HSA-DAG: flat_store_dwordx4
-define void @global_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
%ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
%ext = sext <4 x i32> %ld to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
@@ -231,7 +231,7 @@ define void @global_sextload_v4i32_to_v4
; GCN-HSA-DAG: flat_store_dwordx4
; GCN-SA-DAG: flat_store_dwordx4
; GCN-HSA-DAG: flat_store_dwordx4
-define void @global_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
%ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
%ext = zext <8 x i32> %ld to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
@@ -263,7 +263,7 @@ define void @global_zextload_v8i32_to_v8
; GCN-HSA-DAG: flat_store_dwordx4
; GCN-HSA-DAG: flat_store_dwordx4
; GCN-HSA-DAG: flat_store_dwordx4
-define void @global_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
%ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
%ext = sext <8 x i32> %ld to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
@@ -309,7 +309,7 @@ define void @global_sextload_v8i32_to_v8
; GCN-DAG: v_ashrrev_i32
; GCN-NOHSA-DAG: buffer_store_dwordx4
; GCN-HSA-DAG: flat_store_dwordx4
-define void @global_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
%ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
%ext = sext <16 x i32> %ld to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
@@ -344,7 +344,7 @@ define void @global_sextload_v16i32_to_v
; GCN-HSA: flat_store_dwordx4
; GCN-HSA: flat_store_dwordx4
; GCN-HSA: flat_store_dwordx4
-define void @global_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
%ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
%ext = zext <16 x i32> %ld to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
@@ -444,7 +444,7 @@ define void @global_zextload_v16i32_to_v
; GCN-HSA: flat_store_dwordx4
; GCN-HSA: flat_store_dwordx4
-define void @global_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 {
%ld = load <32 x i32>, <32 x i32> addrspace(1)* %in
%ext = sext <32 x i32> %ld to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
@@ -511,7 +511,7 @@ define void @global_sextload_v32i32_to_v
; GCN-HSA-DAG: flat_store_dwordx4
; GCN-HSA-DAG: flat_store_dwordx4
; GCN-HSA-DAG: flat_store_dwordx4
-define void @global_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 {
%ld = load <32 x i32>, <32 x i32> addrspace(1)* %in
%ext = zext <32 x i32> %ld to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-global-i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-global-i64.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-global-i64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-global-i64.ll Tue Mar 21 16:39:51 2017
@@ -13,7 +13,7 @@
; GCN-HSA: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, [[VAL]]
; EG: VTX_READ_64
-define void @global_load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
%ld = load i64, i64 addrspace(1)* %in
store i64 %ld, i64 addrspace(1)* %out
ret void
@@ -24,7 +24,7 @@ define void @global_load_i64(i64 addrspa
; GCN-HSA: flat_load_dwordx4
; EG: VTX_READ_128
-define void @global_load_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) #0 {
entry:
%ld = load <2 x i64>, <2 x i64> addrspace(1)* %in
store <2 x i64> %ld, <2 x i64> addrspace(1)* %out
@@ -40,7 +40,7 @@ entry:
; EG: VTX_READ_128
; EG: VTX_READ_128
-define void @global_load_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(1)* %in) #0 {
entry:
%ld = load <3 x i64>, <3 x i64> addrspace(1)* %in
store <3 x i64> %ld, <3 x i64> addrspace(1)* %out
@@ -56,7 +56,7 @@ entry:
; EG: VTX_READ_128
; EG: VTX_READ_128
-define void @global_load_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
entry:
%ld = load <4 x i64>, <4 x i64> addrspace(1)* %in
store <4 x i64> %ld, <4 x i64> addrspace(1)* %out
@@ -78,7 +78,7 @@ entry:
; EG: VTX_READ_128
; EG: VTX_READ_128
; EG: VTX_READ_128
-define void @global_load_v8i64(<8 x i64> addrspace(1)* %out, <8 x i64> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v8i64(<8 x i64> addrspace(1)* %out, <8 x i64> addrspace(1)* %in) #0 {
entry:
%ld = load <8 x i64>, <8 x i64> addrspace(1)* %in
store <8 x i64> %ld, <8 x i64> addrspace(1)* %out
@@ -112,7 +112,7 @@ entry:
; EG: VTX_READ_128
; EG: VTX_READ_128
; EG: VTX_READ_128
-define void @global_load_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(1)* %in) #0 {
entry:
%ld = load <16 x i64>, <16 x i64> addrspace(1)* %in
store <16 x i64> %ld, <16 x i64> addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-global-i8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-global-i8.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-global-i8.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-global-i8.ll Tue Mar 21 16:39:51 2017
@@ -11,7 +11,7 @@
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; TODO: NOT AND
-define void @global_load_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
entry:
%ld = load i8, i8 addrspace(1)* %in
store i8 %ld, i8 addrspace(1)* %out
@@ -23,7 +23,7 @@ entry:
; GCN-HSA: flat_load_ushort v
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @global_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
entry:
%ld = load <2 x i8>, <2 x i8> addrspace(1)* %in
store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
@@ -35,7 +35,7 @@ entry:
; GCN-HSA: flat_load_dword v
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @global_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 {
entry:
%ld = load <3 x i8>, <3 x i8> addrspace(1)* %in
store <3 x i8> %ld, <3 x i8> addrspace(1)* %out
@@ -47,7 +47,7 @@ entry:
; GCN-HSA: flat_load_dword v
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @global_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
entry:
%ld = load <4 x i8>, <4 x i8> addrspace(1)* %in
store <4 x i8> %ld, <4 x i8> addrspace(1)* %out
@@ -59,7 +59,7 @@ entry:
; GCN-HSA: flat_load_dwordx2
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
-define void @global_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
entry:
%ld = load <8 x i8>, <8 x i8> addrspace(1)* %in
store <8 x i8> %ld, <8 x i8> addrspace(1)* %out
@@ -72,7 +72,7 @@ entry:
; GCN-HSA: flat_load_dwordx4
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
-define void @global_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
entry:
%ld = load <16 x i8>, <16 x i8> addrspace(1)* %in
store <16 x i8> %ld, <16 x i8> addrspace(1)* %out
@@ -84,7 +84,7 @@ entry:
; GCN-HSA: flat_load_ubyte
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @global_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
%a = load i8, i8 addrspace(1)* %in
%ext = zext i8 %a to i32
store i32 %ext, i32 addrspace(1)* %out
@@ -98,7 +98,7 @@ define void @global_zextload_i8_to_i32(i
; EG: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
; EG: 8
-define void @global_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
%ld = load i8, i8 addrspace(1)* %in
%ext = sext i8 %ld to i32
store i32 %ext, i32 addrspace(1)* %out
@@ -108,7 +108,7 @@ define void @global_sextload_i8_to_i32(i
; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i32:
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @global_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(1)* %in
%ext = zext <1 x i8> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
@@ -120,7 +120,7 @@ define void @global_zextload_v1i8_to_v1i
; EG: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
; EG: 8
-define void @global_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(1)* %in
%ext = sext <1 x i8> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
@@ -135,7 +135,7 @@ define void @global_sextload_v1i8_to_v1i
; TODO: These should use DST, but for some there are redundant MOVs
; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
; EG-DAG: 8
-define void @global_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(1)* %in
%ext = zext <2 x i8> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
@@ -152,7 +152,7 @@ define void @global_zextload_v2i8_to_v2i
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: 8
; EG-DAG: 8
-define void @global_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(1)* %in
%ext = sext <2 x i8> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
@@ -174,7 +174,7 @@ define void @global_sextload_v2i8_to_v2i
; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
; EG-DAG: 8
; EG-DAG: 8
-define void @global_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 {
entry:
%ld = load <3 x i8>, <3 x i8> addrspace(1)* %in
%ext = zext <3 x i8> %ld to <3 x i32>
@@ -207,7 +207,7 @@ entry:
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define void @global_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 {
entry:
%ld = load <3 x i8>, <3 x i8> addrspace(1)* %in
%ext = sext <3 x i8> %ld to <3 x i32>
@@ -227,7 +227,7 @@ entry:
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define void @global_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(1)* %in
%ext = zext <4 x i8> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
@@ -248,7 +248,7 @@ define void @global_zextload_v4i8_to_v4i
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define void @global_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(1)* %in
%ext = sext <4 x i8> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
@@ -273,7 +273,7 @@ define void @global_sextload_v4i8_to_v4i
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define void @global_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(1)* %in
%ext = zext <8 x i8> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
@@ -300,7 +300,7 @@ define void @global_zextload_v8i8_to_v8i
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define void @global_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(1)* %in
%ext = sext <8 x i8> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
@@ -341,7 +341,7 @@ define void @global_sextload_v8i8_to_v8i
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define void @global_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(1)* %in
%ext = zext <16 x i8> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
@@ -384,7 +384,7 @@ define void @global_zextload_v16i8_to_v1
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define void @global_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(1)* %in
%ext = sext <16 x i8> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
@@ -456,7 +456,7 @@ define void @global_sextload_v16i8_to_v1
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define void @global_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(1)* %in
%ext = zext <32 x i8> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
@@ -532,7 +532,7 @@ define void @global_zextload_v32i8_to_v3
; EG-DAG: 8
; EG-DAG: 8
; EG-DAG: 8
-define void @global_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(1)* %in
%ext = sext <32 x i8> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
@@ -545,7 +545,7 @@ define void @global_sextload_v32i8_to_v3
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
-define void @global_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
%load = load <64 x i8>, <64 x i8> addrspace(1)* %in
%ext = zext <64 x i8> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
@@ -558,7 +558,7 @@ define void @global_zextload_v64i8_to_v6
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
-define void @global_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
%load = load <64 x i8>, <64 x i8> addrspace(1)* %in
%ext = sext <64 x i8> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
@@ -576,7 +576,7 @@ define void @global_sextload_v64i8_to_v6
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG: MOV {{.*}}, 0.0
-define void @global_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
%a = load i8, i8 addrspace(1)* %in
%ext = zext i8 %a to i64
store i64 %ext, i64 addrspace(1)* %out
@@ -595,7 +595,7 @@ define void @global_zextload_i8_to_i64(i
; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
; TODO: Why not 7 ?
; EG: 31
-define void @global_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
%a = load i8, i8 addrspace(1)* %in
%ext = sext i8 %a to i64
store i64 %ext, i64 addrspace(1)* %out
@@ -606,7 +606,7 @@ define void @global_sextload_i8_to_i64(i
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG: MOV {{.*}}, 0.0
-define void @global_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(1)* %in
%ext = zext <1 x i8> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
@@ -619,7 +619,7 @@ define void @global_zextload_v1i8_to_v1i
; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
; TODO: Why not 7 ?
; EG: 31
-define void @global_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(1)* %in
%ext = sext <1 x i8> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
@@ -629,7 +629,7 @@ define void @global_sextload_v1i8_to_v1i
; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i64:
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @global_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(1)* %in
%ext = zext <2 x i8> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
@@ -639,7 +639,7 @@ define void @global_zextload_v2i8_to_v2i
; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i64:
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @global_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(1)* %in
%ext = sext <2 x i8> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
@@ -649,7 +649,7 @@ define void @global_sextload_v2i8_to_v2i
; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i64:
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @global_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(1)* %in
%ext = zext <4 x i8> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
@@ -659,7 +659,7 @@ define void @global_zextload_v4i8_to_v4i
; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i64:
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @global_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(1)* %in
%ext = sext <4 x i8> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
@@ -669,7 +669,7 @@ define void @global_sextload_v4i8_to_v4i
; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i64:
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
-define void @global_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(1)* %in
%ext = zext <8 x i8> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
@@ -679,7 +679,7 @@ define void @global_zextload_v8i8_to_v8i
; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i64:
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
-define void @global_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(1)* %in
%ext = sext <8 x i8> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
@@ -689,7 +689,7 @@ define void @global_sextload_v8i8_to_v8i
; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i64:
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
-define void @global_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(1)* %in
%ext = zext <16 x i8> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
@@ -699,7 +699,7 @@ define void @global_zextload_v16i8_to_v1
; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i64:
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
-define void @global_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(1)* %in
%ext = sext <16 x i8> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
@@ -710,7 +710,7 @@ define void @global_sextload_v16i8_to_v1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
-define void @global_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(1)* %in
%ext = zext <32 x i8> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
@@ -721,7 +721,7 @@ define void @global_zextload_v32i8_to_v3
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
-define void @global_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(1)* %in
%ext = sext <32 x i8> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
@@ -729,7 +729,7 @@ define void @global_sextload_v32i8_to_v3
}
; XFUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i64:
-; define void @global_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
+; define amdgpu_kernel void @global_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
; %ext = zext <64 x i8> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
@@ -737,7 +737,7 @@ define void @global_sextload_v32i8_to_v3
; }
; XFUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i64:
-; define void @global_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
+; define amdgpu_kernel void @global_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
; %ext = sext <64 x i8> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
@@ -752,7 +752,7 @@ define void @global_sextload_v32i8_to_v3
; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @global_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
%a = load i8, i8 addrspace(1)* %in
%ext = zext i8 %a to i16
store i16 %ext, i16 addrspace(1)* %out
@@ -768,7 +768,7 @@ define void @global_zextload_i8_to_i16(i
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define void @global_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
%a = load i8, i8 addrspace(1)* %in
%ext = sext i8 %a to i16
store i16 %ext, i16 addrspace(1)* %out
@@ -778,7 +778,7 @@ define void @global_sextload_i8_to_i16(i
; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i16:
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @global_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(1)* %in
%ext = zext <1 x i8> %load to <1 x i16>
store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
@@ -789,7 +789,7 @@ define void @global_zextload_v1i8_to_v1i
; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define void @global_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(1)* %in
%ext = sext <1 x i8> %load to <1 x i16>
store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
@@ -799,7 +799,7 @@ define void @global_sextload_v1i8_to_v1i
; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i16:
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @global_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(1)* %in
%ext = zext <2 x i8> %load to <2 x i16>
store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
@@ -811,7 +811,7 @@ define void @global_zextload_v2i8_to_v2i
; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define void @global_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(1)* %in
%ext = sext <2 x i8> %load to <2 x i16>
store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
@@ -821,7 +821,7 @@ define void @global_sextload_v2i8_to_v2i
; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i16:
; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define void @global_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(1)* %in
%ext = zext <4 x i8> %load to <4 x i16>
store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
@@ -835,7 +835,7 @@ define void @global_zextload_v4i8_to_v4i
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define void @global_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(1)* %in
%ext = sext <4 x i8> %load to <4 x i16>
store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
@@ -845,7 +845,7 @@ define void @global_sextload_v4i8_to_v4i
; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i16:
; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
-define void @global_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(1)* %in
%ext = zext <8 x i8> %load to <8 x i16>
store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
@@ -863,7 +863,7 @@ define void @global_zextload_v8i8_to_v8i
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define void @global_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(1)* %in
%ext = sext <8 x i8> %load to <8 x i16>
store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
@@ -873,7 +873,7 @@ define void @global_sextload_v8i8_to_v8i
; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i16:
; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
-define void @global_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(1)* %in
%ext = zext <16 x i8> %load to <16 x i16>
store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
@@ -899,7 +899,7 @@ define void @global_zextload_v16i8_to_v1
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define void @global_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(1)* %in
%ext = sext <16 x i8> %load to <16 x i16>
store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
@@ -910,7 +910,7 @@ define void @global_sextload_v16i8_to_v1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
-define void @global_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(1)* %in
%ext = zext <32 x i8> %load to <32 x i16>
store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
@@ -953,7 +953,7 @@ define void @global_zextload_v32i8_to_v3
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define void @global_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(1)* %in
%ext = sext <32 x i8> %load to <32 x i16>
store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
@@ -961,7 +961,7 @@ define void @global_sextload_v32i8_to_v3
}
; XFUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i16:
-; define void @global_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
+; define amdgpu_kernel void @global_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
; %ext = zext <64 x i8> %load to <64 x i16>
; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
@@ -969,7 +969,7 @@ define void @global_sextload_v32i8_to_v3
; }
; XFUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i16:
-; define void @global_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
+; define amdgpu_kernel void @global_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
; %ext = sext <64 x i8> %load to <64 x i16>
; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-local-f32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-local-f32.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-local-f32.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-local-f32.ll Tue Mar 21 16:39:51 2017
@@ -7,7 +7,7 @@
; GCN: ds_read_b32
; EG: LDS_READ_RET
-define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) #0 {
+define amdgpu_kernel void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) #0 {
entry:
%tmp0 = load float, float addrspace(3)* %in
store float %tmp0, float addrspace(1)* %out
@@ -20,7 +20,7 @@ entry:
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) #0 {
entry:
%tmp0 = load <2 x float>, <2 x float> addrspace(3)* %in
store <2 x float> %tmp0, <2 x float> addrspace(1)* %out
@@ -38,7 +38,7 @@ entry:
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_load_v3f32(<3 x float> addrspace(3)* %out, <3 x float> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v3f32(<3 x float> addrspace(3)* %out, <3 x float> addrspace(3)* %in) #0 {
entry:
%tmp0 = load <3 x float>, <3 x float> addrspace(3)* %in
store <3 x float> %tmp0, <3 x float> addrspace(3)* %out
@@ -52,7 +52,7 @@ entry:
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_load_v4f32(<4 x float> addrspace(3)* %out, <4 x float> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v4f32(<4 x float> addrspace(3)* %out, <4 x float> addrspace(3)* %in) #0 {
entry:
%tmp0 = load <4 x float>, <4 x float> addrspace(3)* %in
store <4 x float> %tmp0, <4 x float> addrspace(3)* %out
@@ -71,7 +71,7 @@ entry:
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_load_v8f32(<8 x float> addrspace(3)* %out, <8 x float> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v8f32(<8 x float> addrspace(3)* %out, <8 x float> addrspace(3)* %in) #0 {
entry:
%tmp0 = load <8 x float>, <8 x float> addrspace(3)* %in
store <8 x float> %tmp0, <8 x float> addrspace(3)* %out
@@ -100,7 +100,7 @@ entry:
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_load_v16f32(<16 x float> addrspace(3)* %out, <16 x float> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v16f32(<16 x float> addrspace(3)* %out, <16 x float> addrspace(3)* %in) #0 {
entry:
%tmp0 = load <16 x float>, <16 x float> addrspace(3)* %in
store <16 x float> %tmp0, <16 x float> addrspace(3)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-local-f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-local-f64.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-local-f64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-local-f64.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_load_f64(double addrspace(3)* %out, double addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_f64(double addrspace(3)* %out, double addrspace(3)* %in) #0 {
%ld = load double, double addrspace(3)* %in
store double %ld, double addrspace(3)* %out
ret void
@@ -22,7 +22,7 @@ define void @local_load_f64(double addrs
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_load_v2f64(<2 x double> addrspace(3)* %out, <2 x double> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v2f64(<2 x double> addrspace(3)* %out, <2 x double> addrspace(3)* %in) #0 {
entry:
%ld = load <2 x double>, <2 x double> addrspace(3)* %in
store <2 x double> %ld, <2 x double> addrspace(3)* %out
@@ -39,7 +39,7 @@ entry:
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_load_v3f64(<3 x double> addrspace(3)* %out, <3 x double> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v3f64(<3 x double> addrspace(3)* %out, <3 x double> addrspace(3)* %in) #0 {
entry:
%ld = load <3 x double>, <3 x double> addrspace(3)* %in
store <3 x double> %ld, <3 x double> addrspace(3)* %out
@@ -59,7 +59,7 @@ entry:
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_load_v4f64(<4 x double> addrspace(3)* %out, <4 x double> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v4f64(<4 x double> addrspace(3)* %out, <4 x double> addrspace(3)* %in) #0 {
entry:
%ld = load <4 x double>, <4 x double> addrspace(3)* %in
store <4 x double> %ld, <4 x double> addrspace(3)* %out
@@ -88,7 +88,7 @@ entry:
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_load_v8f64(<8 x double> addrspace(3)* %out, <8 x double> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v8f64(<8 x double> addrspace(3)* %out, <8 x double> addrspace(3)* %in) #0 {
entry:
%ld = load <8 x double>, <8 x double> addrspace(3)* %in
store <8 x double> %ld, <8 x double> addrspace(3)* %out
@@ -144,7 +144,7 @@ entry:
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_load_v16f64(<16 x double> addrspace(3)* %out, <16 x double> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v16f64(<16 x double> addrspace(3)* %out, <16 x double> addrspace(3)* %in) #0 {
entry:
%ld = load <16 x double>, <16 x double> addrspace(3)* %in
store <16 x double> %ld, <16 x double> addrspace(3)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-local-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-local-i1.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-local-i1.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-local-i1.ll Tue Mar 21 16:39:51 2017
@@ -10,56 +10,56 @@
; EG: LDS_UBYTE_READ_RET
; EG: AND_INT
; EG: LDS_BYTE_WRITE
-define void @local_load_i1(i1 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_i1(i1 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
%load = load i1, i1 addrspace(3)* %in
store i1 %load, i1 addrspace(3)* %out
ret void
}
; FUNC-LABEL: {{^}}local_load_v2i1:
-define void @local_load_v2i1(<2 x i1> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v2i1(<2 x i1> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
%load = load <2 x i1>, <2 x i1> addrspace(3)* %in
store <2 x i1> %load, <2 x i1> addrspace(3)* %out
ret void
}
; FUNC-LABEL: {{^}}local_load_v3i1:
-define void @local_load_v3i1(<3 x i1> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v3i1(<3 x i1> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
%load = load <3 x i1>, <3 x i1> addrspace(3)* %in
store <3 x i1> %load, <3 x i1> addrspace(3)* %out
ret void
}
; FUNC-LABEL: {{^}}local_load_v4i1:
-define void @local_load_v4i1(<4 x i1> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v4i1(<4 x i1> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
%load = load <4 x i1>, <4 x i1> addrspace(3)* %in
store <4 x i1> %load, <4 x i1> addrspace(3)* %out
ret void
}
; FUNC-LABEL: {{^}}local_load_v8i1:
-define void @local_load_v8i1(<8 x i1> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v8i1(<8 x i1> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
%load = load <8 x i1>, <8 x i1> addrspace(3)* %in
store <8 x i1> %load, <8 x i1> addrspace(3)* %out
ret void
}
; FUNC-LABEL: {{^}}local_load_v16i1:
-define void @local_load_v16i1(<16 x i1> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v16i1(<16 x i1> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
%load = load <16 x i1>, <16 x i1> addrspace(3)* %in
store <16 x i1> %load, <16 x i1> addrspace(3)* %out
ret void
}
; FUNC-LABEL: {{^}}local_load_v32i1:
-define void @local_load_v32i1(<32 x i1> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v32i1(<32 x i1> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
%load = load <32 x i1>, <32 x i1> addrspace(3)* %in
store <32 x i1> %load, <32 x i1> addrspace(3)* %out
ret void
}
; FUNC-LABEL: {{^}}local_load_v64i1:
-define void @local_load_v64i1(<64 x i1> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v64i1(<64 x i1> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
%load = load <64 x i1>, <64 x i1> addrspace(3)* %in
store <64 x i1> %load, <64 x i1> addrspace(3)* %out
ret void
@@ -68,7 +68,7 @@ define void @local_load_v64i1(<64 x i1>
; FUNC-LABEL: {{^}}local_zextload_i1_to_i32:
; GCN: ds_read_u8
; GCN: ds_write_b32
-define void @local_zextload_i1_to_i32(i32 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_i1_to_i32(i32 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
%a = load i1, i1 addrspace(3)* %in
%ext = zext i1 %a to i32
store i32 %ext, i32 addrspace(3)* %out
@@ -82,7 +82,7 @@ define void @local_zextload_i1_to_i32(i3
; EG: LDS_UBYTE_READ_RET
; EG: BFE_INT
-define void @local_sextload_i1_to_i32(i32 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_i1_to_i32(i32 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
%a = load i1, i1 addrspace(3)* %in
%ext = sext i1 %a to i32
store i32 %ext, i32 addrspace(3)* %out
@@ -90,7 +90,7 @@ define void @local_sextload_i1_to_i32(i3
}
; FUNC-LABEL: {{^}}local_zextload_v1i1_to_v1i32:
-define void @local_zextload_v1i1_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v1i1_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
%load = load <1 x i1>, <1 x i1> addrspace(3)* %in
%ext = zext <1 x i1> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
@@ -98,7 +98,7 @@ define void @local_zextload_v1i1_to_v1i3
}
; FUNC-LABEL: {{^}}local_sextload_v1i1_to_v1i32:
-define void @local_sextload_v1i1_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v1i1_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
%load = load <1 x i1>, <1 x i1> addrspace(3)* %in
%ext = sext <1 x i1> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
@@ -106,7 +106,7 @@ define void @local_sextload_v1i1_to_v1i3
}
; FUNC-LABEL: {{^}}local_zextload_v2i1_to_v2i32:
-define void @local_zextload_v2i1_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v2i1_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
%load = load <2 x i1>, <2 x i1> addrspace(3)* %in
%ext = zext <2 x i1> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
@@ -114,7 +114,7 @@ define void @local_zextload_v2i1_to_v2i3
}
; FUNC-LABEL: {{^}}local_sextload_v2i1_to_v2i32:
-define void @local_sextload_v2i1_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v2i1_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
%load = load <2 x i1>, <2 x i1> addrspace(3)* %in
%ext = sext <2 x i1> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
@@ -122,7 +122,7 @@ define void @local_sextload_v2i1_to_v2i3
}
; FUNC-LABEL: {{^}}local_zextload_v3i1_to_v3i32:
-define void @local_zextload_v3i1_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v3i1_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
%load = load <3 x i1>, <3 x i1> addrspace(3)* %in
%ext = zext <3 x i1> %load to <3 x i32>
store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
@@ -130,7 +130,7 @@ define void @local_zextload_v3i1_to_v3i3
}
; FUNC-LABEL: {{^}}local_sextload_v3i1_to_v3i32:
-define void @local_sextload_v3i1_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v3i1_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
%load = load <3 x i1>, <3 x i1> addrspace(3)* %in
%ext = sext <3 x i1> %load to <3 x i32>
store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
@@ -138,7 +138,7 @@ define void @local_sextload_v3i1_to_v3i3
}
; FUNC-LABEL: {{^}}local_zextload_v4i1_to_v4i32:
-define void @local_zextload_v4i1_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v4i1_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
%load = load <4 x i1>, <4 x i1> addrspace(3)* %in
%ext = zext <4 x i1> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
@@ -146,7 +146,7 @@ define void @local_zextload_v4i1_to_v4i3
}
; FUNC-LABEL: {{^}}local_sextload_v4i1_to_v4i32:
-define void @local_sextload_v4i1_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v4i1_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
%load = load <4 x i1>, <4 x i1> addrspace(3)* %in
%ext = sext <4 x i1> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
@@ -154,7 +154,7 @@ define void @local_sextload_v4i1_to_v4i3
}
; FUNC-LABEL: {{^}}local_zextload_v8i1_to_v8i32:
-define void @local_zextload_v8i1_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v8i1_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
%load = load <8 x i1>, <8 x i1> addrspace(3)* %in
%ext = zext <8 x i1> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
@@ -162,7 +162,7 @@ define void @local_zextload_v8i1_to_v8i3
}
; FUNC-LABEL: {{^}}local_sextload_v8i1_to_v8i32:
-define void @local_sextload_v8i1_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v8i1_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
%load = load <8 x i1>, <8 x i1> addrspace(3)* %in
%ext = sext <8 x i1> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
@@ -170,7 +170,7 @@ define void @local_sextload_v8i1_to_v8i3
}
; FUNC-LABEL: {{^}}local_zextload_v16i1_to_v16i32:
-define void @local_zextload_v16i1_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v16i1_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
%load = load <16 x i1>, <16 x i1> addrspace(3)* %in
%ext = zext <16 x i1> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
@@ -178,7 +178,7 @@ define void @local_zextload_v16i1_to_v16
}
; FUNC-LABEL: {{^}}local_sextload_v16i1_to_v16i32:
-define void @local_sextload_v16i1_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v16i1_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
%load = load <16 x i1>, <16 x i1> addrspace(3)* %in
%ext = sext <16 x i1> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
@@ -186,7 +186,7 @@ define void @local_sextload_v16i1_to_v16
}
; FUNC-LABEL: {{^}}local_zextload_v32i1_to_v32i32:
-define void @local_zextload_v32i1_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v32i1_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
%load = load <32 x i1>, <32 x i1> addrspace(3)* %in
%ext = zext <32 x i1> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
@@ -194,7 +194,7 @@ define void @local_zextload_v32i1_to_v32
}
; FUNC-LABEL: {{^}}local_sextload_v32i1_to_v32i32:
-define void @local_sextload_v32i1_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v32i1_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
%load = load <32 x i1>, <32 x i1> addrspace(3)* %in
%ext = sext <32 x i1> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
@@ -202,7 +202,7 @@ define void @local_sextload_v32i1_to_v32
}
; FUNC-LABEL: {{^}}local_zextload_v64i1_to_v64i32:
-define void @local_zextload_v64i1_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v64i1_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
%load = load <64 x i1>, <64 x i1> addrspace(3)* %in
%ext = zext <64 x i1> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
@@ -210,7 +210,7 @@ define void @local_zextload_v64i1_to_v64
}
; FUNC-LABEL: {{^}}local_sextload_v64i1_to_v64i32:
-define void @local_sextload_v64i1_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v64i1_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
%load = load <64 x i1>, <64 x i1> addrspace(3)* %in
%ext = sext <64 x i1> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
@@ -221,7 +221,7 @@ define void @local_sextload_v64i1_to_v64
; GCN-DAG: ds_read_u8 [[LOAD:v[0-9]+]],
; GCN-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
; GCN: ds_write_b64
-define void @local_zextload_i1_to_i64(i64 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_i1_to_i64(i64 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
%a = load i1, i1 addrspace(3)* %in
%ext = zext i1 %a to i64
store i64 %ext, i64 addrspace(3)* %out
@@ -233,7 +233,7 @@ define void @local_zextload_i1_to_i64(i6
; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
; GCN: ds_write_b64
-define void @local_sextload_i1_to_i64(i64 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_i1_to_i64(i64 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
%a = load i1, i1 addrspace(3)* %in
%ext = sext i1 %a to i64
store i64 %ext, i64 addrspace(3)* %out
@@ -241,7 +241,7 @@ define void @local_sextload_i1_to_i64(i6
}
; FUNC-LABEL: {{^}}local_zextload_v1i1_to_v1i64:
-define void @local_zextload_v1i1_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v1i1_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
%load = load <1 x i1>, <1 x i1> addrspace(3)* %in
%ext = zext <1 x i1> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
@@ -249,7 +249,7 @@ define void @local_zextload_v1i1_to_v1i6
}
; FUNC-LABEL: {{^}}local_sextload_v1i1_to_v1i64:
-define void @local_sextload_v1i1_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v1i1_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
%load = load <1 x i1>, <1 x i1> addrspace(3)* %in
%ext = sext <1 x i1> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
@@ -257,7 +257,7 @@ define void @local_sextload_v1i1_to_v1i6
}
; FUNC-LABEL: {{^}}local_zextload_v2i1_to_v2i64:
-define void @local_zextload_v2i1_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v2i1_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
%load = load <2 x i1>, <2 x i1> addrspace(3)* %in
%ext = zext <2 x i1> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
@@ -265,7 +265,7 @@ define void @local_zextload_v2i1_to_v2i6
}
; FUNC-LABEL: {{^}}local_sextload_v2i1_to_v2i64:
-define void @local_sextload_v2i1_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v2i1_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
%load = load <2 x i1>, <2 x i1> addrspace(3)* %in
%ext = sext <2 x i1> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
@@ -273,7 +273,7 @@ define void @local_sextload_v2i1_to_v2i6
}
; FUNC-LABEL: {{^}}local_zextload_v3i1_to_v3i64:
-define void @local_zextload_v3i1_to_v3i64(<3 x i64> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v3i1_to_v3i64(<3 x i64> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
%load = load <3 x i1>, <3 x i1> addrspace(3)* %in
%ext = zext <3 x i1> %load to <3 x i64>
store <3 x i64> %ext, <3 x i64> addrspace(3)* %out
@@ -281,7 +281,7 @@ define void @local_zextload_v3i1_to_v3i6
}
; FUNC-LABEL: {{^}}local_sextload_v3i1_to_v3i64:
-define void @local_sextload_v3i1_to_v3i64(<3 x i64> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v3i1_to_v3i64(<3 x i64> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
%load = load <3 x i1>, <3 x i1> addrspace(3)* %in
%ext = sext <3 x i1> %load to <3 x i64>
store <3 x i64> %ext, <3 x i64> addrspace(3)* %out
@@ -289,7 +289,7 @@ define void @local_sextload_v3i1_to_v3i6
}
; FUNC-LABEL: {{^}}local_zextload_v4i1_to_v4i64:
-define void @local_zextload_v4i1_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v4i1_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
%load = load <4 x i1>, <4 x i1> addrspace(3)* %in
%ext = zext <4 x i1> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
@@ -297,7 +297,7 @@ define void @local_zextload_v4i1_to_v4i6
}
; FUNC-LABEL: {{^}}local_sextload_v4i1_to_v4i64:
-define void @local_sextload_v4i1_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v4i1_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
%load = load <4 x i1>, <4 x i1> addrspace(3)* %in
%ext = sext <4 x i1> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
@@ -305,7 +305,7 @@ define void @local_sextload_v4i1_to_v4i6
}
; FUNC-LABEL: {{^}}local_zextload_v8i1_to_v8i64:
-define void @local_zextload_v8i1_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v8i1_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
%load = load <8 x i1>, <8 x i1> addrspace(3)* %in
%ext = zext <8 x i1> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
@@ -313,7 +313,7 @@ define void @local_zextload_v8i1_to_v8i6
}
; FUNC-LABEL: {{^}}local_sextload_v8i1_to_v8i64:
-define void @local_sextload_v8i1_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v8i1_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
%load = load <8 x i1>, <8 x i1> addrspace(3)* %in
%ext = sext <8 x i1> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
@@ -321,7 +321,7 @@ define void @local_sextload_v8i1_to_v8i6
}
; FUNC-LABEL: {{^}}local_zextload_v16i1_to_v16i64:
-define void @local_zextload_v16i1_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v16i1_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
%load = load <16 x i1>, <16 x i1> addrspace(3)* %in
%ext = zext <16 x i1> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
@@ -329,7 +329,7 @@ define void @local_zextload_v16i1_to_v16
}
; FUNC-LABEL: {{^}}local_sextload_v16i1_to_v16i64:
-define void @local_sextload_v16i1_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v16i1_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
%load = load <16 x i1>, <16 x i1> addrspace(3)* %in
%ext = sext <16 x i1> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
@@ -337,7 +337,7 @@ define void @local_sextload_v16i1_to_v16
}
; FUNC-LABEL: {{^}}local_zextload_v32i1_to_v32i64:
-define void @local_zextload_v32i1_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v32i1_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
%load = load <32 x i1>, <32 x i1> addrspace(3)* %in
%ext = zext <32 x i1> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
@@ -345,7 +345,7 @@ define void @local_zextload_v32i1_to_v32
}
; FUNC-LABEL: {{^}}local_sextload_v32i1_to_v32i64:
-define void @local_sextload_v32i1_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v32i1_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
%load = load <32 x i1>, <32 x i1> addrspace(3)* %in
%ext = sext <32 x i1> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
@@ -353,7 +353,7 @@ define void @local_sextload_v32i1_to_v32
}
; FUNC-LABEL: {{^}}local_zextload_v64i1_to_v64i64:
-define void @local_zextload_v64i1_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v64i1_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
%load = load <64 x i1>, <64 x i1> addrspace(3)* %in
%ext = zext <64 x i1> %load to <64 x i64>
store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
@@ -361,7 +361,7 @@ define void @local_zextload_v64i1_to_v64
}
; FUNC-LABEL: {{^}}local_sextload_v64i1_to_v64i64:
-define void @local_sextload_v64i1_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v64i1_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
%load = load <64 x i1>, <64 x i1> addrspace(3)* %in
%ext = sext <64 x i1> %load to <64 x i64>
store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-local-i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-local-i16.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-local-i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-local-i16.ll Tue Mar 21 16:39:51 2017
@@ -10,7 +10,7 @@
; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
; EG: LDS_SHORT_WRITE {{\*?}} [[TO]], [[DATA]]
-define void @local_load_i16(i16 addrspace(3)* %out, i16 addrspace(3)* %in) {
+define amdgpu_kernel void @local_load_i16(i16 addrspace(3)* %out, i16 addrspace(3)* %in) {
entry:
%ld = load i16, i16 addrspace(3)* %in
store i16 %ld, i16 addrspace(3)* %out
@@ -25,7 +25,7 @@ entry:
; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
-define void @local_load_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) {
+define amdgpu_kernel void @local_load_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) {
entry:
%ld = load <2 x i16>, <2 x i16> addrspace(3)* %in
store <2 x i16> %ld, <2 x i16> addrspace(3)* %out
@@ -39,7 +39,7 @@ entry:
; EG-DAG: LDS_USHORT_READ_RET
; EG-DAG: LDS_READ_RET
-define void @local_load_v3i16(<3 x i16> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
+define amdgpu_kernel void @local_load_v3i16(<3 x i16> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
entry:
%ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
store <3 x i16> %ld, <3 x i16> addrspace(3)* %out
@@ -51,7 +51,7 @@ entry:
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_load_v4i16(<4 x i16> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) {
+define amdgpu_kernel void @local_load_v4i16(<4 x i16> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) {
entry:
%ld = load <4 x i16>, <4 x i16> addrspace(3)* %in
store <4 x i16> %ld, <4 x i16> addrspace(3)* %out
@@ -65,7 +65,7 @@ entry:
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_load_v8i16(<8 x i16> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) {
+define amdgpu_kernel void @local_load_v8i16(<8 x i16> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) {
entry:
%ld = load <8 x i16>, <8 x i16> addrspace(3)* %in
store <8 x i16> %ld, <8 x i16> addrspace(3)* %out
@@ -86,7 +86,7 @@ entry:
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_load_v16i16(<16 x i16> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) {
+define amdgpu_kernel void @local_load_v16i16(<16 x i16> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) {
entry:
%ld = load <16 x i16>, <16 x i16> addrspace(3)* %in
store <16 x i16> %ld, <16 x i16> addrspace(3)* %out
@@ -102,7 +102,7 @@ entry:
; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
-define void @local_zextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
%a = load i16, i16 addrspace(3)* %in
%ext = zext i16 %a to i32
store i32 %ext, i32 addrspace(3)* %out
@@ -121,7 +121,7 @@ define void @local_zextload_i16_to_i32(i
; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
; EG: 16
; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
-define void @local_sextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
%a = load i16, i16 addrspace(3)* %in
%ext = sext i16 %a to i32
store i32 %ext, i32 addrspace(3)* %out
@@ -136,7 +136,7 @@ define void @local_sextload_i16_to_i32(i
; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
-define void @local_zextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
%load = load <1 x i16>, <1 x i16> addrspace(3)* %in
%ext = zext <1 x i16> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
@@ -153,7 +153,7 @@ define void @local_zextload_v1i16_to_v1i
; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
; EG: 16
; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
-define void @local_sextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
%load = load <1 x i16>, <1 x i16> addrspace(3)* %in
%ext = sext <1 x i16> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
@@ -166,7 +166,7 @@ define void @local_sextload_v1i16_to_v1i
; GCN: ds_read_b32
; EG: LDS_READ_RET
-define void @local_zextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
%load = load <2 x i16>, <2 x i16> addrspace(3)* %in
%ext = zext <2 x i16> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
@@ -181,7 +181,7 @@ define void @local_zextload_v2i16_to_v2i
; EG: LDS_READ_RET
; EG: BFE_INT
; EG: BFE_INT
-define void @local_sextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
%load = load <2 x i16>, <2 x i16> addrspace(3)* %in
%ext = sext <2 x i16> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
@@ -194,7 +194,7 @@ define void @local_sextload_v2i16_to_v2i
; GCN-DAG: ds_write_b64
; EG: LDS_READ_RET
-define void @local_local_zextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
+define amdgpu_kernel void @local_local_zextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
entry:
%ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
%ext = zext <3 x i16> %ld to <3 x i32>
@@ -211,7 +211,7 @@ entry:
; EG-DAG: BFE_INT
; EG-DAG: BFE_INT
; EG-DAG: BFE_INT
-define void @local_local_sextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
+define amdgpu_kernel void @local_local_sextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
entry:
%ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
%ext = sext <3 x i16> %ld to <3 x i32>
@@ -226,7 +226,7 @@ entry:
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_local_zextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_local_zextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
%load = load <4 x i16>, <4 x i16> addrspace(3)* %in
%ext = zext <4 x i16> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
@@ -244,7 +244,7 @@ define void @local_local_zextload_v4i16_
; EG-DAG: BFE_INT
; EG-DAG: BFE_INT
; EG-DAG: BFE_INT
-define void @local_sextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
%load = load <4 x i16>, <4 x i16> addrspace(3)* %in
%ext = sext <4 x i16> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
@@ -258,7 +258,7 @@ define void @local_sextload_v4i16_to_v4i
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_zextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
%load = load <8 x i16>, <8 x i16> addrspace(3)* %in
%ext = zext <8 x i16> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
@@ -280,7 +280,7 @@ define void @local_zextload_v8i16_to_v8i
; EG-DAG: BFE_INT
; EG-DAG: BFE_INT
; EG-DAG: BFE_INT
-define void @local_sextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
%load = load <8 x i16>, <8 x i16> addrspace(3)* %in
%ext = sext <8 x i16> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
@@ -304,7 +304,7 @@ define void @local_sextload_v8i16_to_v8i
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_zextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
%load = load <16 x i16>, <16 x i16> addrspace(3)* %in
%ext = zext <16 x i16> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
@@ -340,7 +340,7 @@ define void @local_zextload_v16i16_to_v1
; EG-DAG: BFE_INT
; EG-DAG: BFE_INT
; EG-DAG: BFE_INT
-define void @local_sextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
%load = load <16 x i16>, <16 x i16> addrspace(3)* %in
%ext = sext <16 x i16> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
@@ -369,7 +369,7 @@ define void @local_sextload_v16i16_to_v1
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_zextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
%load = load <32 x i16>, <32 x i16> addrspace(3)* %in
%ext = zext <32 x i16> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
@@ -406,7 +406,7 @@ define void @local_zextload_v32i16_to_v3
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_sextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
%load = load <32 x i16>, <32 x i16> addrspace(3)* %in
%ext = sext <32 x i16> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
@@ -471,7 +471,7 @@ define void @local_sextload_v32i16_to_v3
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_zextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
%load = load <64 x i16>, <64 x i16> addrspace(3)* %in
%ext = zext <64 x i16> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
@@ -512,7 +512,7 @@ define void @local_zextload_v64i16_to_v6
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_sextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
%load = load <64 x i16>, <64 x i16> addrspace(3)* %in
%ext = sext <64 x i16> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
@@ -531,7 +531,7 @@ define void @local_sextload_v64i16_to_v6
; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
; EG-DAG: LDS_WRITE
; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
-define void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
%a = load i16, i16 addrspace(3)* %in
%ext = zext i16 %a to i64
store i64 %ext, i64 addrspace(3)* %out
@@ -558,7 +558,7 @@ define void @local_zextload_i16_to_i64(i
; EG-DAG: LDS_WRITE
; EG-DAG: 16
; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
-define void @local_sextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
%a = load i16, i16 addrspace(3)* %in
%ext = sext i16 %a to i64
store i64 %ext, i64 addrspace(3)* %out
@@ -573,7 +573,7 @@ define void @local_sextload_i16_to_i64(i
; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
; EG-DAG: LDS_WRITE
; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
-define void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
%load = load <1 x i16>, <1 x i16> addrspace(3)* %in
%ext = zext <1 x i16> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
@@ -590,7 +590,7 @@ define void @local_zextload_v1i16_to_v1i
; EG-DAG: LDS_WRITE
; EG-DAG: 16
; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
-define void @local_sextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
%load = load <1 x i16>, <1 x i16> addrspace(3)* %in
%ext = sext <1 x i16> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
@@ -600,7 +600,7 @@ define void @local_sextload_v1i16_to_v1i
; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i64:
; EG: LDS_READ_RET
-define void @local_zextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
%load = load <2 x i16>, <2 x i16> addrspace(3)* %in
%ext = zext <2 x i16> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
@@ -612,7 +612,7 @@ define void @local_zextload_v2i16_to_v2i
; EG: LDS_READ_RET
; EG-DAG: BFE_INT
; EG-DAG: ASHR
-define void @local_sextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
%load = load <2 x i16>, <2 x i16> addrspace(3)* %in
%ext = sext <2 x i16> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
@@ -623,7 +623,7 @@ define void @local_sextload_v2i16_to_v2i
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_zextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
%load = load <4 x i16>, <4 x i16> addrspace(3)* %in
%ext = zext <4 x i16> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
@@ -638,7 +638,7 @@ define void @local_zextload_v4i16_to_v4i
; EG-DAG: BFE_INT
; EG-DAG: ASHR
; EG-DAG: ASHR
-define void @local_sextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
%load = load <4 x i16>, <4 x i16> addrspace(3)* %in
%ext = sext <4 x i16> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
@@ -651,7 +651,7 @@ define void @local_sextload_v4i16_to_v4i
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_zextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
%load = load <8 x i16>, <8 x i16> addrspace(3)* %in
%ext = zext <8 x i16> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
@@ -672,7 +672,7 @@ define void @local_zextload_v8i16_to_v8i
; EG-DAG: BFE_INT
; EG-DAG: ASHR
; EG-DAG: ASHR
-define void @local_sextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
%load = load <8 x i16>, <8 x i16> addrspace(3)* %in
%ext = sext <8 x i16> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
@@ -689,7 +689,7 @@ define void @local_sextload_v8i16_to_v8i
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_zextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
%load = load <16 x i16>, <16 x i16> addrspace(3)* %in
%ext = zext <16 x i16> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
@@ -722,7 +722,7 @@ define void @local_zextload_v16i16_to_v1
; EG-DAG: BFE_INT
; EG-DAG: ASHR
; EG-DAG: ASHR
-define void @local_sextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
%load = load <16 x i16>, <16 x i16> addrspace(3)* %in
%ext = sext <16 x i16> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
@@ -747,7 +747,7 @@ define void @local_sextload_v16i16_to_v1
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_zextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
%load = load <32 x i16>, <32 x i16> addrspace(3)* %in
%ext = zext <32 x i16> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
@@ -804,7 +804,7 @@ define void @local_zextload_v32i16_to_v3
; EG-DAG: BFE_INT
; EG-DAG: ASHR
; EG-DAG: ASHR
-define void @local_sextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
%load = load <32 x i16>, <32 x i16> addrspace(3)* %in
%ext = sext <32 x i16> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
@@ -812,7 +812,7 @@ define void @local_sextload_v32i16_to_v3
}
; ; XFUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i64:
-; define void @local_zextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
+; define amdgpu_kernel void @local_zextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
; %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
; %ext = zext <64 x i16> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
@@ -820,7 +820,7 @@ define void @local_sextload_v32i16_to_v3
; }
; ; XFUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i64:
-; define void @local_sextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
+; define amdgpu_kernel void @local_sextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
; %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
; %ext = sext <64 x i16> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-local-i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-local-i32.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-local-i32.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-local-i32.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@
; GCN: ds_read_b32
; EG: LDS_READ_RET
-define void @local_load_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
entry:
%ld = load i32, i32 addrspace(3)* %in
store i32 %ld, i32 addrspace(3)* %out
@@ -18,7 +18,7 @@ entry:
; FUNC-LABEL: {{^}}local_load_v2i32:
; GCN: ds_read_b64
-define void @local_load_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
entry:
%ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
store <2 x i32> %ld, <2 x i32> addrspace(3)* %out
@@ -28,7 +28,7 @@ entry:
; FUNC-LABEL: {{^}}local_load_v3i32:
; GCN-DAG: ds_read_b64
; GCN-DAG: ds_read_b32
-define void @local_load_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> addrspace(3)* %in) #0 {
entry:
%ld = load <3 x i32>, <3 x i32> addrspace(3)* %in
store <3 x i32> %ld, <3 x i32> addrspace(3)* %out
@@ -38,7 +38,7 @@ entry:
; FUNC-LABEL: {{^}}local_load_v4i32:
; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
-define void @local_load_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
entry:
%ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
store <4 x i32> %ld, <4 x i32> addrspace(3)* %out
@@ -48,7 +48,7 @@ entry:
; FUNC-LABEL: {{^}}local_load_v8i32:
; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
-define void @local_load_v8i32(<8 x i32> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v8i32(<8 x i32> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
entry:
%ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
store <8 x i32> %ld, <8 x i32> addrspace(3)* %out
@@ -64,7 +64,7 @@ entry:
; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5
; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1
-define void @local_load_v16i32(<16 x i32> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v16i32(<16 x i32> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
entry:
%ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
store <16 x i32> %ld, <16 x i32> addrspace(3)* %out
@@ -72,7 +72,7 @@ entry:
}
; FUNC-LABEL: {{^}}local_zextload_i32_to_i64:
-define void @local_zextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
%ld = load i32, i32 addrspace(3)* %in
%ext = zext i32 %ld to i64
store i64 %ext, i64 addrspace(3)* %out
@@ -80,7 +80,7 @@ define void @local_zextload_i32_to_i64(i
}
; FUNC-LABEL: {{^}}local_sextload_i32_to_i64:
-define void @local_sextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
%ld = load i32, i32 addrspace(3)* %in
%ext = sext i32 %ld to i64
store i64 %ext, i64 addrspace(3)* %out
@@ -88,7 +88,7 @@ define void @local_sextload_i32_to_i64(i
}
; FUNC-LABEL: {{^}}local_zextload_v1i32_to_v1i64:
-define void @local_zextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 {
%ld = load <1 x i32>, <1 x i32> addrspace(3)* %in
%ext = zext <1 x i32> %ld to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
@@ -96,7 +96,7 @@ define void @local_zextload_v1i32_to_v1i
}
; FUNC-LABEL: {{^}}local_sextload_v1i32_to_v1i64:
-define void @local_sextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 {
%ld = load <1 x i32>, <1 x i32> addrspace(3)* %in
%ext = sext <1 x i32> %ld to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
@@ -104,7 +104,7 @@ define void @local_sextload_v1i32_to_v1i
}
; FUNC-LABEL: {{^}}local_zextload_v2i32_to_v2i64:
-define void @local_zextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
%ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
%ext = zext <2 x i32> %ld to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
@@ -112,7 +112,7 @@ define void @local_zextload_v2i32_to_v2i
}
; FUNC-LABEL: {{^}}local_sextload_v2i32_to_v2i64:
-define void @local_sextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
%ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
%ext = sext <2 x i32> %ld to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
@@ -120,7 +120,7 @@ define void @local_sextload_v2i32_to_v2i
}
; FUNC-LABEL: {{^}}local_zextload_v4i32_to_v4i64:
-define void @local_zextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
%ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
%ext = zext <4 x i32> %ld to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
@@ -128,7 +128,7 @@ define void @local_zextload_v4i32_to_v4i
}
; FUNC-LABEL: {{^}}local_sextload_v4i32_to_v4i64:
-define void @local_sextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
%ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
%ext = sext <4 x i32> %ld to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
@@ -136,7 +136,7 @@ define void @local_sextload_v4i32_to_v4i
}
; FUNC-LABEL: {{^}}local_zextload_v8i32_to_v8i64:
-define void @local_zextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
%ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
%ext = zext <8 x i32> %ld to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
@@ -144,7 +144,7 @@ define void @local_zextload_v8i32_to_v8i
}
; FUNC-LABEL: {{^}}local_sextload_v8i32_to_v8i64:
-define void @local_sextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
%ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
%ext = sext <8 x i32> %ld to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
@@ -152,7 +152,7 @@ define void @local_sextload_v8i32_to_v8i
}
; FUNC-LABEL: {{^}}local_sextload_v16i32_to_v16i64:
-define void @local_sextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
%ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
%ext = sext <16 x i32> %ld to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
@@ -160,7 +160,7 @@ define void @local_sextload_v16i32_to_v1
}
; FUNC-LABEL: {{^}}local_zextload_v16i32_to_v16i64
-define void @local_zextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
%ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
%ext = zext <16 x i32> %ld to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
@@ -168,7 +168,7 @@ define void @local_zextload_v16i32_to_v1
}
; FUNC-LABEL: {{^}}local_sextload_v32i32_to_v32i64:
-define void @local_sextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 {
%ld = load <32 x i32>, <32 x i32> addrspace(3)* %in
%ext = sext <32 x i32> %ld to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
@@ -176,7 +176,7 @@ define void @local_sextload_v32i32_to_v3
}
; FUNC-LABEL: {{^}}local_zextload_v32i32_to_v32i64:
-define void @local_zextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 {
%ld = load <32 x i32>, <32 x i32> addrspace(3)* %in
%ext = zext <32 x i32> %ld to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-local-i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-local-i64.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-local-i64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-local-i64.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_load_i64(i64 addrspace(3)* %out, i64 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_i64(i64 addrspace(3)* %out, i64 addrspace(3)* %in) #0 {
%ld = load i64, i64 addrspace(3)* %in
store i64 %ld, i64 addrspace(3)* %out
ret void
@@ -22,7 +22,7 @@ define void @local_load_i64(i64 addrspac
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_load_v2i64(<2 x i64> addrspace(3)* %out, <2 x i64> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v2i64(<2 x i64> addrspace(3)* %out, <2 x i64> addrspace(3)* %in) #0 {
entry:
%ld = load <2 x i64>, <2 x i64> addrspace(3)* %in
store <2 x i64> %ld, <2 x i64> addrspace(3)* %out
@@ -39,7 +39,7 @@ entry:
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_load_v3i64(<3 x i64> addrspace(3)* %out, <3 x i64> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v3i64(<3 x i64> addrspace(3)* %out, <3 x i64> addrspace(3)* %in) #0 {
entry:
%ld = load <3 x i64>, <3 x i64> addrspace(3)* %in
store <3 x i64> %ld, <3 x i64> addrspace(3)* %out
@@ -59,7 +59,7 @@ entry:
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_load_v4i64(<4 x i64> addrspace(3)* %out, <4 x i64> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v4i64(<4 x i64> addrspace(3)* %out, <4 x i64> addrspace(3)* %in) #0 {
entry:
%ld = load <4 x i64>, <4 x i64> addrspace(3)* %in
store <4 x i64> %ld, <4 x i64> addrspace(3)* %out
@@ -88,7 +88,7 @@ entry:
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_load_v8i64(<8 x i64> addrspace(3)* %out, <8 x i64> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v8i64(<8 x i64> addrspace(3)* %out, <8 x i64> addrspace(3)* %in) #0 {
entry:
%ld = load <8 x i64>, <8 x i64> addrspace(3)* %in
store <8 x i64> %ld, <8 x i64> addrspace(3)* %out
@@ -144,7 +144,7 @@ entry:
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_load_v16i64(<16 x i64> addrspace(3)* %out, <16 x i64> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v16i64(<16 x i64> addrspace(3)* %out, <16 x i64> addrspace(3)* %in) #0 {
entry:
%ld = load <16 x i64>, <16 x i64> addrspace(3)* %in
store <16 x i64> %ld, <16 x i64> addrspace(3)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-local-i8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-local-i8.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-local-i8.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-local-i8.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@
; GCN: ds_read_u8
; EG: LDS_UBYTE_READ_RET
-define void @local_load_i8(i8 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_i8(i8 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
entry:
%ld = load i8, i8 addrspace(3)* %in
store i8 %ld, i8 addrspace(3)* %out
@@ -22,7 +22,7 @@ entry:
; GCN: ds_read_u16
; EG: LDS_USHORT_READ_RET
-define void @local_load_v2i8(<2 x i8> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v2i8(<2 x i8> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
entry:
%ld = load <2 x i8>, <2 x i8> addrspace(3)* %in
store <2 x i8> %ld, <2 x i8> addrspace(3)* %out
@@ -33,7 +33,7 @@ entry:
; GCN: ds_read_b32
; EG: DS_READ_RET
-define void @local_load_v3i8(<3 x i8> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v3i8(<3 x i8> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
entry:
%ld = load <3 x i8>, <3 x i8> addrspace(3)* %in
store <3 x i8> %ld, <3 x i8> addrspace(3)* %out
@@ -44,7 +44,7 @@ entry:
; GCN: ds_read_b32
; EG: LDS_READ_RET
-define void @local_load_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
entry:
%ld = load <4 x i8>, <4 x i8> addrspace(3)* %in
store <4 x i8> %ld, <4 x i8> addrspace(3)* %out
@@ -56,7 +56,7 @@ entry:
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_load_v8i8(<8 x i8> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v8i8(<8 x i8> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
entry:
%ld = load <8 x i8>, <8 x i8> addrspace(3)* %in
store <8 x i8> %ld, <8 x i8> addrspace(3)* %out
@@ -71,7 +71,7 @@ entry:
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_load_v16i8(<16 x i8> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_load_v16i8(<16 x i8> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
entry:
%ld = load <16 x i8>, <16 x i8> addrspace(3)* %in
store <16 x i8> %ld, <16 x i8> addrspace(3)* %out
@@ -84,7 +84,7 @@ entry:
; GCN: ds_read_u8
; EG: LDS_UBYTE_READ_RET
-define void @local_zextload_i8_to_i32(i32 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_i8_to_i32(i32 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
%a = load i8, i8 addrspace(3)* %in
%ext = zext i8 %a to i32
store i32 %ext, i32 addrspace(3)* %out
@@ -98,7 +98,7 @@ define void @local_zextload_i8_to_i32(i3
; EG: LDS_UBYTE_READ_RET
; EG: BFE_INT
-define void @local_sextload_i8_to_i32(i32 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_i8_to_i32(i32 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
%ld = load i8, i8 addrspace(3)* %in
%ext = sext i8 %ld to i32
store i32 %ext, i32 addrspace(3)* %out
@@ -108,7 +108,7 @@ define void @local_sextload_i8_to_i32(i3
; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i32:
; EG: LDS_UBYTE_READ_RET
-define void @local_zextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(3)* %in
%ext = zext <1 x i8> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
@@ -119,7 +119,7 @@ define void @local_zextload_v1i8_to_v1i3
; EG: LDS_UBYTE_READ_RET
; EG: BFE_INT
-define void @local_sextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(3)* %in
%ext = sext <1 x i8> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
@@ -130,7 +130,7 @@ define void @local_sextload_v1i8_to_v1i3
; GCN: ds_read_u16
; EG: LDS_USHORT_READ_RET
-define void @local_zextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(3)* %in
%ext = zext <2 x i8> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
@@ -156,7 +156,7 @@ define void @local_zextload_v2i8_to_v2i3
; EG: LDS_USHORT_READ_RET
; EG-DAG: BFE_INT
; EG-DAG: BFE_INT
-define void @local_sextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(3)* %in
%ext = sext <2 x i8> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
@@ -172,7 +172,7 @@ define void @local_sextload_v2i8_to_v2i3
; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff,
; EG: LDS_READ_RET
-define void @local_zextload_v3i8_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v3i8_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
entry:
%ld = load <3 x i8>, <3 x i8> addrspace(3)* %in
%ext = zext <3 x i8> %ld to <3 x i32>
@@ -197,7 +197,7 @@ entry:
; EG-DAG: BFE_INT
; EG-DAG: BFE_INT
; EG-DAG: BFE_INT
-define void @local_sextload_v3i8_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v3i8_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
entry:
%ld = load <3 x i8>, <3 x i8> addrspace(3)* %in
%ext = sext <3 x i8> %ld to <3 x i32>
@@ -214,7 +214,7 @@ entry:
; EG-DAG: BFE_UINT
; EG-DAG: BFE_UINT
; EG-DAG: BFE_UINT
-define void @local_zextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(3)* %in
%ext = zext <4 x i8> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
@@ -231,7 +231,7 @@ define void @local_zextload_v4i8_to_v4i3
; EG-DAG: BFE_INT
; EG-DAG: BFE_INT
; EG-DAG: BFE_INT
-define void @local_sextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(3)* %in
%ext = sext <4 x i8> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
@@ -248,7 +248,7 @@ define void @local_sextload_v4i8_to_v4i3
; EG-DAG: BFE_UINT
; EG-DAG: BFE_UINT
; EG-DAG: BFE_UINT
-define void @local_zextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(3)* %in
%ext = zext <8 x i8> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
@@ -267,7 +267,7 @@ define void @local_zextload_v8i8_to_v8i3
; EG-DAG: BFE_INT
; EG-DAG: BFE_INT
; EG-DAG: BFE_INT
-define void @local_sextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(3)* %in
%ext = sext <8 x i8> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
@@ -292,7 +292,7 @@ define void @local_sextload_v8i8_to_v8i3
; EG-DAG: BFE_UINT
; EG-DAG: BFE_UINT
; EG-DAG: BFE_UINT
-define void @local_zextload_v16i8_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v16i8_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(3)* %in
%ext = zext <16 x i8> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
@@ -321,7 +321,7 @@ define void @local_zextload_v16i8_to_v16
; EG-DAG: BFE_INT
; EG-DAG: BFE_INT
; EG-DAG: BFE_INT
-define void @local_sextload_v16i8_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v16i8_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(3)* %in
%ext = sext <16 x i8> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
@@ -338,7 +338,7 @@ define void @local_sextload_v16i8_to_v16
; EG-DAG: LDS_READ_RET
; EG-DAG: LDS_READ_RET
; EG-DAG: LDS_READ_RET
-define void @local_zextload_v32i8_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v32i8_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(3)* %in
%ext = zext <32 x i8> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
@@ -355,7 +355,7 @@ define void @local_zextload_v32i8_to_v32
; EG-DAG: LDS_READ_RET
; EG-DAG: LDS_READ_RET
; EG-DAG: LDS_READ_RET
-define void @local_sextload_v32i8_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v32i8_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(3)* %in
%ext = sext <32 x i8> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
@@ -380,7 +380,7 @@ define void @local_sextload_v32i8_to_v32
; EG-DAG: LDS_READ_RET
; EG-DAG: LDS_READ_RET
; EG-DAG: LDS_READ_RET
-define void @local_zextload_v64i8_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v64i8_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
%load = load <64 x i8>, <64 x i8> addrspace(3)* %in
%ext = zext <64 x i8> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
@@ -405,7 +405,7 @@ define void @local_zextload_v64i8_to_v64
; EG-DAG: LDS_READ_RET
; EG-DAG: LDS_READ_RET
; EG-DAG: LDS_READ_RET
-define void @local_sextload_v64i8_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v64i8_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
%load = load <64 x i8>, <64 x i8> addrspace(3)* %in
%ext = sext <64 x i8> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
@@ -420,7 +420,7 @@ define void @local_sextload_v64i8_to_v64
; EG: LDS_UBYTE_READ_RET
; EG: MOV {{.*}}, literal
; EG: 0.0
-define void @local_zextload_i8_to_i64(i64 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_i8_to_i64(i64 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
%a = load i8, i8 addrspace(3)* %in
%ext = zext i8 %a to i64
store i64 %ext, i64 addrspace(3)* %out
@@ -437,7 +437,7 @@ define void @local_zextload_i8_to_i64(i6
; EG: ASHR
; TODO: why not 7?
; EG: 31
-define void @local_sextload_i8_to_i64(i64 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_i8_to_i64(i64 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
%a = load i8, i8 addrspace(3)* %in
%ext = sext i8 %a to i64
store i64 %ext, i64 addrspace(3)* %out
@@ -450,7 +450,7 @@ define void @local_sextload_i8_to_i64(i6
; EG: MOV {{.*}}, literal
; TODO: merge?
; EG: 0.0
-define void @local_zextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(3)* %in
%ext = zext <1 x i8> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
@@ -463,7 +463,7 @@ define void @local_zextload_v1i8_to_v1i6
; EG: ASHR
; TODO: why not 7?
; EG: 31
-define void @local_sextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(3)* %in
%ext = sext <1 x i8> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
@@ -473,7 +473,7 @@ define void @local_sextload_v1i8_to_v1i6
; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i64:
; EG: LDS_USHORT_READ_RET
-define void @local_zextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(3)* %in
%ext = zext <2 x i8> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
@@ -485,7 +485,7 @@ define void @local_zextload_v2i8_to_v2i6
; EG: LDS_USHORT_READ_RET
; EG: BFE_INT
; EG: BFE_INT
-define void @local_sextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(3)* %in
%ext = sext <2 x i8> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
@@ -495,7 +495,7 @@ define void @local_sextload_v2i8_to_v2i6
; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i64:
; EG: LDS_READ_RET
-define void @local_zextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(3)* %in
%ext = zext <4 x i8> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
@@ -505,7 +505,7 @@ define void @local_zextload_v4i8_to_v4i6
; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i64:
; EG: LDS_READ_RET
-define void @local_sextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(3)* %in
%ext = sext <4 x i8> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
@@ -516,7 +516,7 @@ define void @local_sextload_v4i8_to_v4i6
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_zextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(3)* %in
%ext = zext <8 x i8> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
@@ -536,7 +536,7 @@ define void @local_zextload_v8i8_to_v8i6
; EG-DAG: BFE_INT
; EG-DAG: BFE_INT
; EG-DAG: BFE_INT
-define void @local_sextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(3)* %in
%ext = sext <8 x i8> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
@@ -549,7 +549,7 @@ define void @local_sextload_v8i8_to_v8i6
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_zextload_v16i8_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v16i8_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(3)* %in
%ext = zext <16 x i8> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
@@ -562,7 +562,7 @@ define void @local_zextload_v16i8_to_v16
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_sextload_v16i8_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v16i8_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(3)* %in
%ext = sext <16 x i8> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
@@ -579,7 +579,7 @@ define void @local_sextload_v16i8_to_v16
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_zextload_v32i8_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v32i8_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(3)* %in
%ext = zext <32 x i8> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
@@ -596,7 +596,7 @@ define void @local_zextload_v32i8_to_v32
; EG: LDS_READ_RET
; EG: LDS_READ_RET
; EG: LDS_READ_RET
-define void @local_sextload_v32i8_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v32i8_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(3)* %in
%ext = sext <32 x i8> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
@@ -604,7 +604,7 @@ define void @local_sextload_v32i8_to_v32
}
; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i64:
-; define void @local_zextload_v64i8_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
+; define amdgpu_kernel void @local_zextload_v64i8_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
; %ext = zext <64 x i8> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
@@ -612,7 +612,7 @@ define void @local_sextload_v32i8_to_v32
; }
; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i64:
-; define void @local_sextload_v64i8_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
+; define amdgpu_kernel void @local_sextload_v64i8_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
; %ext = sext <64 x i8> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
@@ -625,7 +625,7 @@ define void @local_sextload_v32i8_to_v32
; EG: LDS_UBYTE_READ_RET
; EG: LDS_SHORT_WRITE
-define void @local_zextload_i8_to_i16(i16 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_i8_to_i16(i16 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
%a = load i8, i8 addrspace(3)* %in
%ext = zext i8 %a to i16
store i16 %ext, i16 addrspace(3)* %out
@@ -639,7 +639,7 @@ define void @local_zextload_i8_to_i16(i1
; EG: LDS_UBYTE_READ_RET
; EG: BFE_INT
; EG: LDS_SHORT_WRITE
-define void @local_sextload_i8_to_i16(i16 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_i8_to_i16(i16 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
%a = load i8, i8 addrspace(3)* %in
%ext = sext i8 %a to i16
store i16 %ext, i16 addrspace(3)* %out
@@ -650,7 +650,7 @@ define void @local_sextload_i8_to_i16(i1
; EG: LDS_UBYTE_READ_RET
; EG: LDS_SHORT_WRITE
-define void @local_zextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(3)* %in
%ext = zext <1 x i8> %load to <1 x i16>
store <1 x i16> %ext, <1 x i16> addrspace(3)* %out
@@ -662,7 +662,7 @@ define void @local_zextload_v1i8_to_v1i1
; EG: LDS_UBYTE_READ_RET
; EG: BFE_INT
; EG: LDS_SHORT_WRITE
-define void @local_sextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
%load = load <1 x i8>, <1 x i8> addrspace(3)* %in
%ext = sext <1 x i8> %load to <1 x i16>
store <1 x i16> %ext, <1 x i16> addrspace(3)* %out
@@ -673,7 +673,7 @@ define void @local_sextload_v1i8_to_v1i1
; EG: LDS_USHORT_READ_RET
; EG: LDS_WRITE
-define void @local_zextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(3)* %in
%ext = zext <2 x i8> %load to <2 x i16>
store <2 x i16> %ext, <2 x i16> addrspace(3)* %out
@@ -686,7 +686,7 @@ define void @local_zextload_v2i8_to_v2i1
; EG: BFE_INT
; EG: BFE_INT
; EG: LDS_WRITE
-define void @local_sextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
%load = load <2 x i8>, <2 x i8> addrspace(3)* %in
%ext = sext <2 x i8> %load to <2 x i16>
store <2 x i16> %ext, <2 x i16> addrspace(3)* %out
@@ -698,7 +698,7 @@ define void @local_sextload_v2i8_to_v2i1
; EG: LDS_READ_RET
; EG: LDS_WRITE
; EG: LDS_WRITE
-define void @local_zextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(3)* %in
%ext = zext <4 x i8> %load to <4 x i16>
store <4 x i16> %ext, <4 x i16> addrspace(3)* %out
@@ -715,7 +715,7 @@ define void @local_zextload_v4i8_to_v4i1
; EG-DAG: BFE_INT
; EG: LDS_WRITE
; EG: LDS_WRITE
-define void @local_sextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
%load = load <4 x i8>, <4 x i8> addrspace(3)* %in
%ext = sext <4 x i8> %load to <4 x i16>
store <4 x i16> %ext, <4 x i16> addrspace(3)* %out
@@ -730,7 +730,7 @@ define void @local_sextload_v4i8_to_v4i1
; EG: LDS_WRITE
; EG: LDS_WRITE
; EG: LDS_WRITE
-define void @local_zextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(3)* %in
%ext = zext <8 x i8> %load to <8 x i16>
store <8 x i16> %ext, <8 x i16> addrspace(3)* %out
@@ -754,7 +754,7 @@ define void @local_zextload_v8i8_to_v8i1
; EG: LDS_WRITE
; EG: LDS_WRITE
; EG: LDS_WRITE
-define void @local_sextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
%load = load <8 x i8>, <8 x i8> addrspace(3)* %in
%ext = sext <8 x i8> %load to <8 x i16>
store <8 x i16> %ext, <8 x i16> addrspace(3)* %out
@@ -775,7 +775,7 @@ define void @local_sextload_v8i8_to_v8i1
; EG: LDS_WRITE
; EG: LDS_WRITE
; EG: LDS_WRITE
-define void @local_zextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(3)* %in
%ext = zext <16 x i8> %load to <16 x i16>
store <16 x i16> %ext, <16 x i16> addrspace(3)* %out
@@ -813,7 +813,7 @@ define void @local_zextload_v16i8_to_v16
; EG: LDS_WRITE
; EG: LDS_WRITE
; EG: LDS_WRITE
-define void @local_sextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
%load = load <16 x i8>, <16 x i8> addrspace(3)* %in
%ext = sext <16 x i8> %load to <16 x i16>
store <16 x i16> %ext, <16 x i16> addrspace(3)* %out
@@ -846,7 +846,7 @@ define void @local_sextload_v16i8_to_v16
; EG: LDS_WRITE
; EG: LDS_WRITE
; EG: LDS_WRITE
-define void @local_zextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_zextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(3)* %in
%ext = zext <32 x i8> %load to <32 x i16>
store <32 x i16> %ext, <32 x i16> addrspace(3)* %out
@@ -908,7 +908,7 @@ define void @local_zextload_v32i8_to_v32
; EG: LDS_WRITE
; EG: LDS_WRITE
; EG: LDS_WRITE
-define void @local_sextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
+define amdgpu_kernel void @local_sextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
%load = load <32 x i8>, <32 x i8> addrspace(3)* %in
%ext = sext <32 x i8> %load to <32 x i16>
store <32 x i16> %ext, <32 x i16> addrspace(3)* %out
@@ -916,7 +916,7 @@ define void @local_sextload_v32i8_to_v32
}
; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i16:
-; define void @local_zextload_v64i8_to_v64i16(<64 x i16> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
+; define amdgpu_kernel void @local_zextload_v64i8_to_v64i16(<64 x i16> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
; %ext = zext <64 x i8> %load to <64 x i16>
; store <64 x i16> %ext, <64 x i16> addrspace(3)* %out
@@ -924,7 +924,7 @@ define void @local_sextload_v32i8_to_v32
; }
; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i16:
-; define void @local_sextload_v64i8_to_v64i16(<64 x i16> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
+; define amdgpu_kernel void @local_sextload_v64i8_to_v64i16(<64 x i16> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
; %ext = sext <64 x i8> %load to <64 x i16>
; store <64 x i16> %ext, <64 x i16> addrspace(3)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-weird-sizes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-weird-sizes.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-weird-sizes.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-weird-sizes.ll Tue Mar 21 16:39:51 2017
@@ -8,7 +8,7 @@
; SI: {{flat|buffer}}_load_ubyte
; SI: {{flat|buffer}}_load_ushort
; SI: {{flat|buffer}}_store_dword
-define void @load_i24(i32 addrspace(1)* %out, i24 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @load_i24(i32 addrspace(1)* %out, i24 addrspace(1)* %in) #0 {
%1 = load i24, i24 addrspace(1)* %in
%2 = zext i24 %1 to i32
store i32 %2, i32 addrspace(1)* %out
@@ -21,7 +21,7 @@ define void @load_i24(i32 addrspace(1)*
; CI-HSA: flat_load_dword [[VAL:v[0-9]+]]
; CI-HSA: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VAL]]
-define void @load_i25(i32 addrspace(1)* %out, i25 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @load_i25(i32 addrspace(1)* %out, i25 addrspace(1)* %in) #0 {
%1 = load i25, i25 addrspace(1)* %in
%2 = zext i25 %1 to i32
store i32 %2, i32 addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/local-64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/local-64.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/local-64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/local-64.ll Tue Mar 21 16:39:51 2017
@@ -5,7 +5,7 @@
; BOTH-LABEL: {{^}}local_i32_load
; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28
; BOTH: buffer_store_dword [[REG]],
-define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
+define amdgpu_kernel void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
%val = load i32, i32 addrspace(3)* %gep, align 4
store i32 %val, i32 addrspace(1)* %out, align 4
@@ -15,7 +15,7 @@ define void @local_i32_load(i32 addrspac
; BOTH-LABEL: {{^}}local_i32_load_0_offset
; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}}
; BOTH: buffer_store_dword [[REG]],
-define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
+define amdgpu_kernel void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
%val = load i32, i32 addrspace(3)* %in, align 4
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
@@ -25,7 +25,7 @@ define void @local_i32_load_0_offset(i32
; BOTH-NOT: ADD
; BOTH: ds_read_u8 [[REG:v[0-9]+]], {{v[0-9]+}} offset:65535
; BOTH: buffer_store_byte [[REG]],
-define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
+define amdgpu_kernel void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
%gep = getelementptr i8, i8 addrspace(3)* %in, i32 65535
%val = load i8, i8 addrspace(3)* %gep, align 4
store i8 %val, i8 addrspace(1)* %out, align 4
@@ -40,7 +40,7 @@ define void @local_i8_load_i16_max_offse
; BOTH: v_mov_b32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]]
; BOTH: ds_read_u8 [[REG:v[0-9]+]], [[VREGADDR]]
; BOTH: buffer_store_byte [[REG]],
-define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
+define amdgpu_kernel void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
%gep = getelementptr i8, i8 addrspace(3)* %in, i32 65536
%val = load i8, i8 addrspace(3)* %gep, align 4
store i8 %val, i8 addrspace(1)* %out, align 4
@@ -51,7 +51,7 @@ define void @local_i8_load_over_i16_max_
; BOTH-NOT: ADD
; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56
; BOTH: buffer_store_dwordx2 [[REG]],
-define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
+define amdgpu_kernel void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %in, i32 7
%val = load i64, i64 addrspace(3)* %gep, align 8
store i64 %val, i64 addrspace(1)* %out, align 8
@@ -61,7 +61,7 @@ define void @local_i64_load(i64 addrspac
; BOTH-LABEL: {{^}}local_i64_load_0_offset
; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
; BOTH: buffer_store_dwordx2 [[REG]],
-define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
+define amdgpu_kernel void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
%val = load i64, i64 addrspace(3)* %in, align 8
store i64 %val, i64 addrspace(1)* %out, align 8
ret void
@@ -71,7 +71,7 @@ define void @local_i64_load_0_offset(i64
; BOTH-NOT: ADD
; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56
; BOTH: buffer_store_dwordx2 [[REG]],
-define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
+define amdgpu_kernel void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
%gep = getelementptr double, double addrspace(3)* %in, i32 7
%val = load double, double addrspace(3)* %gep, align 8
store double %val, double addrspace(1)* %out, align 8
@@ -81,7 +81,7 @@ define void @local_f64_load(double addrs
; BOTH-LABEL: {{^}}local_f64_load_0_offset
; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
; BOTH: buffer_store_dwordx2 [[REG]],
-define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
+define amdgpu_kernel void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
%val = load double, double addrspace(3)* %in, align 8
store double %val, double addrspace(1)* %out, align 8
ret void
@@ -90,7 +90,7 @@ define void @local_f64_load_0_offset(dou
; BOTH-LABEL: {{^}}local_i64_store:
; BOTH-NOT: ADD
; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56
-define void @local_i64_store(i64 addrspace(3)* %out) nounwind {
+define amdgpu_kernel void @local_i64_store(i64 addrspace(3)* %out) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %out, i32 7
store i64 5678, i64 addrspace(3)* %gep, align 8
ret void
@@ -99,7 +99,7 @@ define void @local_i64_store(i64 addrspa
; BOTH-LABEL: {{^}}local_i64_store_0_offset:
; BOTH-NOT: ADD
; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
-define void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind {
+define amdgpu_kernel void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind {
store i64 1234, i64 addrspace(3)* %out, align 8
ret void
}
@@ -107,7 +107,7 @@ define void @local_i64_store_0_offset(i6
; BOTH-LABEL: {{^}}local_f64_store:
; BOTH-NOT: ADD
; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56
-define void @local_f64_store(double addrspace(3)* %out) nounwind {
+define amdgpu_kernel void @local_f64_store(double addrspace(3)* %out) nounwind {
%gep = getelementptr double, double addrspace(3)* %out, i32 7
store double 16.0, double addrspace(3)* %gep, align 8
ret void
@@ -115,7 +115,7 @@ define void @local_f64_store(double addr
; BOTH-LABEL: {{^}}local_f64_store_0_offset
; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
-define void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind {
+define amdgpu_kernel void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind {
store double 20.0, double addrspace(3)* %out, align 8
ret void
}
@@ -124,7 +124,7 @@ define void @local_f64_store_0_offset(do
; BOTH-NOT: ADD
; BOTH: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15
; BOTH: s_endpgm
-define void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind {
+define amdgpu_kernel void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind {
%gep = getelementptr <2 x i64>, <2 x i64> addrspace(3)* %out, i32 7
store <2 x i64> <i64 5678, i64 5678>, <2 x i64> addrspace(3)* %gep, align 16
ret void
@@ -134,7 +134,7 @@ define void @local_v2i64_store(<2 x i64>
; BOTH-NOT: ADD
; BOTH: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1
; BOTH: s_endpgm
-define void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind {
+define amdgpu_kernel void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind {
store <2 x i64> <i64 1234, i64 1234>, <2 x i64> addrspace(3)* %out, align 16
ret void
}
@@ -144,7 +144,7 @@ define void @local_v2i64_store_0_offset(
; BOTH-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31
; BOTH-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29
; BOTH: s_endpgm
-define void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind {
+define amdgpu_kernel void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind {
%gep = getelementptr <4 x i64>, <4 x i64> addrspace(3)* %out, i32 7
store <4 x i64> <i64 5678, i64 5678, i64 5678, i64 5678>, <4 x i64> addrspace(3)* %gep, align 16
ret void
@@ -155,7 +155,7 @@ define void @local_v4i64_store(<4 x i64>
; BOTH-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
; BOTH-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1
; BOTH: s_endpgm
-define void @local_v4i64_store_0_offset(<4 x i64> addrspace(3)* %out) nounwind {
+define amdgpu_kernel void @local_v4i64_store_0_offset(<4 x i64> addrspace(3)* %out) nounwind {
store <4 x i64> <i64 1234, i64 1234, i64 1234, i64 1234>, <4 x i64> addrspace(3)* %out, align 16
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/local-atomics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/local-atomics.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/local-atomics.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/local-atomics.ll Tue Mar 21 16:39:51 2017
@@ -11,7 +11,7 @@
; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
-define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -21,7 +21,7 @@ define void @lds_atomic_xchg_ret_i32(i32
; EG: LDS_WRXCHG_RET *
; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -37,7 +37,7 @@ define void @lds_atomic_xchg_ret_i32_off
; GCN: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
-define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -47,7 +47,7 @@ define void @lds_atomic_add_ret_i32(i32
; EG: LDS_ADD_RET *
; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -59,7 +59,7 @@ define void @lds_atomic_add_ret_i32_offs
; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
@@ -73,7 +73,7 @@ define void @lds_atomic_add_ret_i32_bad_
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]]
; GCN: s_endpgm
-define void @lds_atomic_add1_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_add1_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -84,7 +84,7 @@ define void @lds_atomic_add1_ret_i32(i32
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] offset:16
; GCN: s_endpgm
-define void @lds_atomic_add1_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_add1_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -96,7 +96,7 @@ define void @lds_atomic_add1_ret_i32_off
; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_add1_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @lds_atomic_add1_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
@@ -109,7 +109,7 @@ define void @lds_atomic_add1_ret_i32_bad
; EG: LDS_SUB_RET *
; GCN: ds_sub_rtn_u32
; GCN: s_endpgm
-define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -119,7 +119,7 @@ define void @lds_atomic_sub_ret_i32(i32
; EG: LDS_SUB_RET *
; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -131,7 +131,7 @@ define void @lds_atomic_sub_ret_i32_offs
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]]
; GCN: s_endpgm
-define void @lds_atomic_sub1_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_sub1_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -142,7 +142,7 @@ define void @lds_atomic_sub1_ret_i32(i32
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] offset:16
; GCN: s_endpgm
-define void @lds_atomic_sub1_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_sub1_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -153,7 +153,7 @@ define void @lds_atomic_sub1_ret_i32_off
; EG: LDS_AND_RET *
; GCN: ds_and_rtn_b32
; GCN: s_endpgm
-define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -163,7 +163,7 @@ define void @lds_atomic_and_ret_i32(i32
; EG: LDS_AND_RET *
; GCN: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -174,7 +174,7 @@ define void @lds_atomic_and_ret_i32_offs
; EG: LDS_OR_RET *
; GCN: ds_or_rtn_b32
; GCN: s_endpgm
-define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -184,7 +184,7 @@ define void @lds_atomic_or_ret_i32(i32 a
; EG: LDS_OR_RET *
; GCN: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -195,7 +195,7 @@ define void @lds_atomic_or_ret_i32_offse
; EG: LDS_XOR_RET *
; GCN: ds_xor_rtn_b32
; GCN: s_endpgm
-define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -205,7 +205,7 @@ define void @lds_atomic_xor_ret_i32(i32
; EG: LDS_XOR_RET *
; GCN: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -214,7 +214,7 @@ define void @lds_atomic_xor_ret_i32_offs
; FIXME: There is no atomic nand instr
; XFUNC-LABEL: {{^}}lds_atomic_nand_ret_i32:uction, so we somehow need to expand this.
-; define void @lds_atomic_nand_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+; define amdgpu_kernel void @lds_atomic_nand_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
; %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst
; store i32 %result, i32 addrspace(1)* %out, align 4
; ret void
@@ -224,7 +224,7 @@ define void @lds_atomic_xor_ret_i32_offs
; EG: LDS_MIN_INT_RET *
; GCN: ds_min_rtn_i32
; GCN: s_endpgm
-define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -234,7 +234,7 @@ define void @lds_atomic_min_ret_i32(i32
; EG: LDS_MIN_INT_RET *
; GCN: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -245,7 +245,7 @@ define void @lds_atomic_min_ret_i32_offs
; EG: LDS_MAX_INT_RET *
; GCN: ds_max_rtn_i32
; GCN: s_endpgm
-define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -255,7 +255,7 @@ define void @lds_atomic_max_ret_i32(i32
; EG: LDS_MAX_INT_RET *
; GCN: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -266,7 +266,7 @@ define void @lds_atomic_max_ret_i32_offs
; EG: LDS_MIN_UINT_RET *
; GCN: ds_min_rtn_u32
; GCN: s_endpgm
-define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -276,7 +276,7 @@ define void @lds_atomic_umin_ret_i32(i32
; EG: LDS_MIN_UINT_RET *
; GCN: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -287,7 +287,7 @@ define void @lds_atomic_umin_ret_i32_off
; EG: LDS_MAX_UINT_RET *
; GCN: ds_max_rtn_u32
; GCN: s_endpgm
-define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -297,7 +297,7 @@ define void @lds_atomic_umax_ret_i32(i32
; EG: LDS_MAX_UINT_RET *
; GCN: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -310,7 +310,7 @@ define void @lds_atomic_umax_ret_i32_off
; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
; GCN: s_endpgm
-define void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
@@ -318,7 +318,7 @@ define void @lds_atomic_xchg_noret_i32(i
; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset:
; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
@@ -330,7 +330,7 @@ define void @lds_atomic_xchg_noret_i32_o
; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; GCN: ds_add_u32 [[VPTR]], [[DATA]]
; GCN: s_endpgm
-define void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
@@ -338,7 +338,7 @@ define void @lds_atomic_add_noret_i32(i3
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset:
; GCN: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
@@ -348,7 +348,7 @@ define void @lds_atomic_add_noret_i32_of
; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}}
; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
@@ -360,7 +360,7 @@ define void @lds_atomic_add_noret_i32_ba
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
; GCN: ds_add_u32 v{{[0-9]+}}, [[ONE]]
; GCN: s_endpgm
-define void @lds_atomic_add1_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_add1_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
ret void
}
@@ -369,7 +369,7 @@ define void @lds_atomic_add1_noret_i32(i
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
; GCN: ds_add_u32 v{{[0-9]+}}, [[ONE]] offset:16
; GCN: s_endpgm
-define void @lds_atomic_add1_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_add1_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
ret void
@@ -379,7 +379,7 @@ define void @lds_atomic_add1_noret_i32_o
; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}}
; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_add1_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @lds_atomic_add1_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
@@ -390,7 +390,7 @@ define void @lds_atomic_add1_noret_i32_b
; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32:
; GCN: ds_sub_u32
; GCN: s_endpgm
-define void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
@@ -398,7 +398,7 @@ define void @lds_atomic_sub_noret_i32(i3
; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset:
; GCN: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
@@ -408,7 +408,7 @@ define void @lds_atomic_sub_noret_i32_of
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
; GCN: ds_sub_u32 v{{[0-9]+}}, [[ONE]]
; GCN: s_endpgm
-define void @lds_atomic_sub1_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_sub1_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
ret void
}
@@ -417,7 +417,7 @@ define void @lds_atomic_sub1_noret_i32(i
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
; GCN: ds_sub_u32 v{{[0-9]+}}, [[ONE]] offset:16
; GCN: s_endpgm
-define void @lds_atomic_sub1_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_sub1_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
ret void
@@ -426,7 +426,7 @@ define void @lds_atomic_sub1_noret_i32_o
; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32:
; GCN: ds_and_b32
; GCN: s_endpgm
-define void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
@@ -434,7 +434,7 @@ define void @lds_atomic_and_noret_i32(i3
; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset:
; GCN: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
@@ -443,7 +443,7 @@ define void @lds_atomic_and_noret_i32_of
; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32:
; GCN: ds_or_b32
; GCN: s_endpgm
-define void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
@@ -451,7 +451,7 @@ define void @lds_atomic_or_noret_i32(i32
; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset:
; GCN: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
@@ -460,7 +460,7 @@ define void @lds_atomic_or_noret_i32_off
; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32:
; GCN: ds_xor_b32
; GCN: s_endpgm
-define void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
@@ -468,7 +468,7 @@ define void @lds_atomic_xor_noret_i32(i3
; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset:
; GCN: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
@@ -476,7 +476,7 @@ define void @lds_atomic_xor_noret_i32_of
; FIXME: There is no atomic nand instr
; XFUNC-LABEL: {{^}}lds_atomic_nand_noret_i32:uction, so we somehow need to expand this.
-; define void @lds_atomic_nand_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+; define amdgpu_kernel void @lds_atomic_nand_noret_i32(i32 addrspace(3)* %ptr) nounwind {
; %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst
; ret void
; }
@@ -484,7 +484,7 @@ define void @lds_atomic_xor_noret_i32_of
; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32:
; GCN: ds_min_i32
; GCN: s_endpgm
-define void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
@@ -492,7 +492,7 @@ define void @lds_atomic_min_noret_i32(i3
; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset:
; GCN: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
@@ -501,7 +501,7 @@ define void @lds_atomic_min_noret_i32_of
; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32:
; GCN: ds_max_i32
; GCN: s_endpgm
-define void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
@@ -509,7 +509,7 @@ define void @lds_atomic_max_noret_i32(i3
; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset:
; GCN: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
@@ -518,7 +518,7 @@ define void @lds_atomic_max_noret_i32_of
; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32:
; GCN: ds_min_u32
; GCN: s_endpgm
-define void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
@@ -526,7 +526,7 @@ define void @lds_atomic_umin_noret_i32(i
; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset:
; GCN: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
@@ -535,7 +535,7 @@ define void @lds_atomic_umin_noret_i32_o
; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32:
; GCN: ds_max_u32
; GCN: s_endpgm
-define void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
@@ -543,7 +543,7 @@ define void @lds_atomic_umax_noret_i32(i
; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset:
; GCN: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
-define void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/local-atomics64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/local-atomics64.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/local-atomics64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/local-atomics64.ll Tue Mar 21 16:39:51 2017
@@ -4,7 +4,7 @@
; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64:
; GCN: ds_wrxchg_rtn_b64
; GCN: s_endpgm
-define void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -13,7 +13,7 @@ define void @lds_atomic_xchg_ret_i64(i64
; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset:
; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -23,7 +23,7 @@ define void @lds_atomic_xchg_ret_i64_off
; GCN-LABEL: {{^}}lds_atomic_add_ret_i64:
; GCN: ds_add_rtn_u64
; GCN: s_endpgm
-define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -38,7 +38,7 @@ define void @lds_atomic_add_ret_i64(i64
; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32
; GCN: buffer_store_dwordx2 [[RESULT]],
; GCN: s_endpgm
-define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4
%result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -51,7 +51,7 @@ define void @lds_atomic_add_ret_i64_offs
; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
; GCN: buffer_store_dwordx2 [[RESULT]],
; GCN: s_endpgm
-define void @lds_atomic_add1_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_add1_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -60,7 +60,7 @@ define void @lds_atomic_add1_ret_i64(i64
; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64_offset:
; GCN: ds_add_rtn_u64 {{.*}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_add1_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_add1_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -70,7 +70,7 @@ define void @lds_atomic_add1_ret_i64_off
; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64:
; GCN: ds_sub_rtn_u64
; GCN: s_endpgm
-define void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -79,7 +79,7 @@ define void @lds_atomic_sub_ret_i64(i64
; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64_offset:
; GCN: ds_sub_rtn_u64 {{.*}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -92,7 +92,7 @@ define void @lds_atomic_sub_ret_i64_offs
; GCN: ds_sub_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
; GCN: buffer_store_dwordx2 [[RESULT]],
; GCN: s_endpgm
-define void @lds_atomic_sub1_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_sub1_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -101,7 +101,7 @@ define void @lds_atomic_sub1_ret_i64(i64
; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64_offset:
; GCN: ds_sub_rtn_u64 {{.*}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_sub1_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_sub1_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -111,7 +111,7 @@ define void @lds_atomic_sub1_ret_i64_off
; GCN-LABEL: {{^}}lds_atomic_and_ret_i64:
; GCN: ds_and_rtn_b64
; GCN: s_endpgm
-define void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -120,7 +120,7 @@ define void @lds_atomic_and_ret_i64(i64
; GCN-LABEL: {{^}}lds_atomic_and_ret_i64_offset:
; GCN: ds_and_rtn_b64 {{.*}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -130,7 +130,7 @@ define void @lds_atomic_and_ret_i64_offs
; GCN-LABEL: {{^}}lds_atomic_or_ret_i64:
; GCN: ds_or_rtn_b64
; GCN: s_endpgm
-define void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -139,7 +139,7 @@ define void @lds_atomic_or_ret_i64(i64 a
; GCN-LABEL: {{^}}lds_atomic_or_ret_i64_offset:
; GCN: ds_or_rtn_b64 {{.*}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -149,7 +149,7 @@ define void @lds_atomic_or_ret_i64_offse
; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64:
; GCN: ds_xor_rtn_b64
; GCN: s_endpgm
-define void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -158,7 +158,7 @@ define void @lds_atomic_xor_ret_i64(i64
; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64_offset:
; GCN: ds_xor_rtn_b64 {{.*}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -167,7 +167,7 @@ define void @lds_atomic_xor_ret_i64_offs
; FIXME: There is no atomic nand instr
; XGCN-LABEL: {{^}}lds_atomic_nand_ret_i64:uction, so we somehow need to expand this.
-; define void @lds_atomic_nand_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+; define amdgpu_kernel void @lds_atomic_nand_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
; %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst
; store i64 %result, i64 addrspace(1)* %out, align 8
; ret void
@@ -176,7 +176,7 @@ define void @lds_atomic_xor_ret_i64_offs
; GCN-LABEL: {{^}}lds_atomic_min_ret_i64:
; GCN: ds_min_rtn_i64
; GCN: s_endpgm
-define void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -185,7 +185,7 @@ define void @lds_atomic_min_ret_i64(i64
; GCN-LABEL: {{^}}lds_atomic_min_ret_i64_offset:
; GCN: ds_min_rtn_i64 {{.*}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -195,7 +195,7 @@ define void @lds_atomic_min_ret_i64_offs
; GCN-LABEL: {{^}}lds_atomic_max_ret_i64:
; GCN: ds_max_rtn_i64
; GCN: s_endpgm
-define void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -204,7 +204,7 @@ define void @lds_atomic_max_ret_i64(i64
; GCN-LABEL: {{^}}lds_atomic_max_ret_i64_offset:
; GCN: ds_max_rtn_i64 {{.*}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -214,7 +214,7 @@ define void @lds_atomic_max_ret_i64_offs
; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64:
; GCN: ds_min_rtn_u64
; GCN: s_endpgm
-define void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -223,7 +223,7 @@ define void @lds_atomic_umin_ret_i64(i64
; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64_offset:
; GCN: ds_min_rtn_u64 {{.*}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -233,7 +233,7 @@ define void @lds_atomic_umin_ret_i64_off
; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64:
; GCN: ds_max_rtn_u64
; GCN: s_endpgm
-define void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -242,7 +242,7 @@ define void @lds_atomic_umax_ret_i64(i64
; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64_offset:
; GCN: ds_max_rtn_u64 {{.*}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -252,7 +252,7 @@ define void @lds_atomic_umax_ret_i64_off
; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64:
; GCN: ds_wrxchg_rtn_b64
; GCN: s_endpgm
-define void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
@@ -260,7 +260,7 @@ define void @lds_atomic_xchg_noret_i64(i
; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset:
; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
@@ -269,7 +269,7 @@ define void @lds_atomic_xchg_noret_i64_o
; GCN-LABEL: {{^}}lds_atomic_add_noret_i64:
; GCN: ds_add_u64
; GCN: s_endpgm
-define void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
@@ -282,7 +282,7 @@ define void @lds_atomic_add_noret_i64(i6
; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; GCN: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4
%result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
ret void
@@ -293,7 +293,7 @@ define void @lds_atomic_add_noret_i64_of
; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
; GCN: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
; GCN: s_endpgm
-define void @lds_atomic_add1_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_add1_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
ret void
}
@@ -301,7 +301,7 @@ define void @lds_atomic_add1_noret_i64(i
; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64_offset:
; GCN: ds_add_u64 {{.*}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_add1_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_add1_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
ret void
@@ -310,7 +310,7 @@ define void @lds_atomic_add1_noret_i64_o
; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64:
; GCN: ds_sub_u64
; GCN: s_endpgm
-define void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
@@ -318,7 +318,7 @@ define void @lds_atomic_sub_noret_i64(i6
; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64_offset:
; GCN: ds_sub_u64 {{.*}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
@@ -329,7 +329,7 @@ define void @lds_atomic_sub_noret_i64_of
; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
; GCN: ds_sub_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
; GCN: s_endpgm
-define void @lds_atomic_sub1_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_sub1_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
ret void
}
@@ -337,7 +337,7 @@ define void @lds_atomic_sub1_noret_i64(i
; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64_offset:
; GCN: ds_sub_u64 {{.*}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_sub1_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_sub1_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
ret void
@@ -346,7 +346,7 @@ define void @lds_atomic_sub1_noret_i64_o
; GCN-LABEL: {{^}}lds_atomic_and_noret_i64:
; GCN: ds_and_b64
; GCN: s_endpgm
-define void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
@@ -354,7 +354,7 @@ define void @lds_atomic_and_noret_i64(i6
; GCN-LABEL: {{^}}lds_atomic_and_noret_i64_offset:
; GCN: ds_and_b64 {{.*}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
@@ -363,7 +363,7 @@ define void @lds_atomic_and_noret_i64_of
; GCN-LABEL: {{^}}lds_atomic_or_noret_i64:
; GCN: ds_or_b64
; GCN: s_endpgm
-define void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
@@ -371,7 +371,7 @@ define void @lds_atomic_or_noret_i64(i64
; GCN-LABEL: {{^}}lds_atomic_or_noret_i64_offset:
; GCN: ds_or_b64 {{.*}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
@@ -380,7 +380,7 @@ define void @lds_atomic_or_noret_i64_off
; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64:
; GCN: ds_xor_b64
; GCN: s_endpgm
-define void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
@@ -388,7 +388,7 @@ define void @lds_atomic_xor_noret_i64(i6
; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64_offset:
; GCN: ds_xor_b64 {{.*}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
@@ -396,7 +396,7 @@ define void @lds_atomic_xor_noret_i64_of
; FIXME: There is no atomic nand instr
; XGCN-LABEL: {{^}}lds_atomic_nand_noret_i64:uction, so we somehow need to expand this.
-; define void @lds_atomic_nand_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+; define amdgpu_kernel void @lds_atomic_nand_noret_i64(i64 addrspace(3)* %ptr) nounwind {
; %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst
; ret void
; }
@@ -404,7 +404,7 @@ define void @lds_atomic_xor_noret_i64_of
; GCN-LABEL: {{^}}lds_atomic_min_noret_i64:
; GCN: ds_min_i64
; GCN: s_endpgm
-define void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
@@ -412,7 +412,7 @@ define void @lds_atomic_min_noret_i64(i6
; GCN-LABEL: {{^}}lds_atomic_min_noret_i64_offset:
; GCN: ds_min_i64 {{.*}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
@@ -421,7 +421,7 @@ define void @lds_atomic_min_noret_i64_of
; GCN-LABEL: {{^}}lds_atomic_max_noret_i64:
; GCN: ds_max_i64
; GCN: s_endpgm
-define void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
@@ -429,7 +429,7 @@ define void @lds_atomic_max_noret_i64(i6
; GCN-LABEL: {{^}}lds_atomic_max_noret_i64_offset:
; GCN: ds_max_i64 {{.*}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
@@ -438,7 +438,7 @@ define void @lds_atomic_max_noret_i64_of
; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64:
; GCN: ds_min_u64
; GCN: s_endpgm
-define void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
@@ -446,7 +446,7 @@ define void @lds_atomic_umin_noret_i64(i
; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64_offset:
; GCN: ds_min_u64 {{.*}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
@@ -455,7 +455,7 @@ define void @lds_atomic_umin_noret_i64_o
; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64:
; GCN: ds_max_u64
; GCN: s_endpgm
-define void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
@@ -463,7 +463,7 @@ define void @lds_atomic_umax_noret_i64(i
; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64_offset:
; GCN: ds_max_u64 {{.*}} offset:32
; GCN: s_endpgm
-define void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/local-memory.amdgcn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/local-memory.amdgcn.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/local-memory.amdgcn.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/local-memory.amdgcn.ll Tue Mar 21 16:39:51 2017
@@ -17,7 +17,7 @@
; GCN: s_barrier
; GCN: ds_read_b32 {{v[0-9]+}},
-define void @local_memory(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @local_memory(i32 addrspace(1)* %out) #0 {
entry:
%y.i = call i32 @llvm.amdgcn.workitem.id.x() #1
%arrayidx = getelementptr inbounds [128 x i32], [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %y.i
@@ -61,7 +61,7 @@ entry:
; CI: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, 0, [[ADDRW]]
; CI: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, [[SUB]] offset0:3 offset1:7
-define void @local_memory_two_objects(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @local_memory_two_objects(i32 addrspace(1)* %out) #0 {
entry:
%x.i = call i32 @llvm.amdgcn.workitem.id.x()
%arrayidx = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %x.i
Modified: llvm/trunk/test/CodeGen/AMDGPU/local-memory.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/local-memory.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/local-memory.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/local-memory.ll Tue Mar 21 16:39:51 2017
@@ -14,7 +14,7 @@
; GCN: ds_read_b32 v{{[0-9]+}}, v[[ZERO]] offset:4
; R600: LDS_READ_RET
-define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) #0 {
entry:
%tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1
%tmp1 = load i32, i32 addrspace(3)* %tmp0
@@ -30,7 +30,7 @@ entry:
; R600: LDS_READ_RET
; GCN-DAG: ds_read_b32
; GCN-DAG: ds_read2_b32
-define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) #0 {
%scalar = load i32, i32 addrspace(3)* %in
%tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)*
%vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2
Modified: llvm/trunk/test/CodeGen/AMDGPU/local-memory.r600.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/local-memory.r600.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/local-memory.r600.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/local-memory.r600.ll Tue Mar 21 16:39:51 2017
@@ -15,7 +15,7 @@
; EG-NEXT: ALU clause
; EG: LDS_READ_RET
-define void @local_memory(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @local_memory(i32 addrspace(1)* %out) #0 {
entry:
%y.i = call i32 @llvm.r600.read.tidig.x() #1
%arrayidx = getelementptr inbounds [128 x i32], [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %y.i
@@ -57,7 +57,7 @@ entry:
; EG: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
; EG-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
-define void @local_memory_two_objects(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @local_memory_two_objects(i32 addrspace(1)* %out) #0 {
entry:
%x.i = call i32 @llvm.r600.read.tidig.x() #1
%arrayidx = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %x.i
Modified: llvm/trunk/test/CodeGen/AMDGPU/loop-address.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/loop-address.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/loop-address.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/loop-address.ll Tue Mar 21 16:39:51 2017
@@ -5,7 +5,7 @@
;CHECK: LOOP_BREAK @10
;CHECK: POP @10
-define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) #0 {
+define amdgpu_kernel void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) #0 {
entry:
%cmp5 = icmp sgt i32 %iterations, 0
br i1 %cmp5, label %for.body, label %for.end
Modified: llvm/trunk/test/CodeGen/AMDGPU/loop-idiom.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/loop-idiom.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/loop-idiom.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/loop-idiom.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@
; FUNC: @no_memcpy
; R600-NOT: {{^}}llvm.memcpy
; SI-NOT: {{^}}llvm.memcpy
-define void @no_memcpy(i8 addrspace(3)* %in, i32 %size) {
+define amdgpu_kernel void @no_memcpy(i8 addrspace(3)* %in, i32 %size) {
entry:
%dest = alloca i8, i32 32
br label %for.body
@@ -33,7 +33,7 @@ for.end:
; R600-NOT: {{^}}memset_pattern16:
; SI-NOT: {{^}}llvm.memset
; SI-NOT: {{^}}memset_pattern16:
-define void @no_memset(i32 %size) {
+define amdgpu_kernel void @no_memset(i32 %size) {
entry:
%dest = alloca i8, i32 32
br label %for.body
Modified: llvm/trunk/test/CodeGen/AMDGPU/loop_break.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/loop_break.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/loop_break.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/loop_break.ll Tue Mar 21 16:39:51 2017
@@ -43,7 +43,7 @@
; GCN: ; BB#4: ; %bb9
; GCN-NEXT: s_or_b64 exec, exec, [[MASK]]
; GCN-NEXT: s_endpgm
-define void @break_loop(i32 %arg) #0 {
+define amdgpu_kernel void @break_loop(i32 %arg) #0 {
bb:
%id = call i32 @llvm.amdgcn.workitem.id.x()
%tmp = sub i32 %id, %arg
@@ -87,7 +87,7 @@ bb9:
; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
; OPT-NEXT: store volatile i32 7
; OPT-NEXT: ret void
-define void @undef_phi_cond_break_loop(i32 %arg) #0 {
+define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
bb:
%id = call i32 @llvm.amdgcn.workitem.id.x()
%tmp = sub i32 %id, %arg
@@ -140,7 +140,7 @@ bb9:
; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
; OPT-NEXT: store volatile i32 7
; OPT-NEXT: ret void
-define void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
+define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
bb:
%id = call i32 @llvm.amdgcn.workitem.id.x()
%tmp = sub i32 %id, %arg
@@ -190,7 +190,7 @@ bb9:
; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
; OPT-NEXT: store volatile i32 7
; OPT-NEXT: ret void
-define void @true_phi_cond_break_loop(i32 %arg) #0 {
+define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
bb:
%id = call i32 @llvm.amdgcn.workitem.id.x()
%tmp = sub i32 %id, %arg
@@ -240,7 +240,7 @@ bb9:
; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
; OPT-NEXT: store volatile i32 7
; OPT-NEXT: ret void
-define void @false_phi_cond_break_loop(i32 %arg) #0 {
+define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
bb:
%id = call i32 @llvm.amdgcn.workitem.id.x()
%tmp = sub i32 %id, %arg
@@ -295,7 +295,7 @@ bb9:
; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %1)
; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef
; OPT-NEXT: ret void
-define void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
+define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
bb:
%id = call i32 @llvm.amdgcn.workitem.id.x()
%tmp = sub i32 %id, %arg
Modified: llvm/trunk/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@ declare void @llvm.memset.p1i8.i64(i8 ad
; Test the upper bound for sizes to leave
; OPT-LABEL: @max_size_small_static_memcpy_caller0(
; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false)
-define void @max_size_small_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
+define amdgpu_kernel void @max_size_small_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false)
ret void
}
@@ -21,14 +21,14 @@ define void @max_size_small_static_memcp
; OPT-NEXT: load i8
; OPT: getelementptr
; OPT-NEXT: store i8
-define void @min_size_large_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
+define amdgpu_kernel void @min_size_large_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i32 1, i1 false)
ret void
}
; OPT-LABEL: @max_size_small_static_memmove_caller0(
; OPT: call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false)
-define void @max_size_small_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
+define amdgpu_kernel void @max_size_small_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false)
ret void
}
@@ -39,14 +39,14 @@ define void @max_size_small_static_memmo
; OPT-NEXT: load i8
; OPT: getelementptr
; OPT-NEXT: store i8
-define void @min_size_large_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
+define amdgpu_kernel void @min_size_large_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i32 1, i1 false)
ret void
}
; OPT-LABEL: @max_size_small_static_memset_caller0(
; OPT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i32 1, i1 false)
-define void @max_size_small_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 {
+define amdgpu_kernel void @max_size_small_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 {
call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i32 1, i1 false)
ret void
}
@@ -55,7 +55,7 @@ define void @max_size_small_static_memse
; OPT-NOT: call
; OPT: getelementptr
; OPT: store i8
-define void @min_size_large_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 {
+define amdgpu_kernel void @min_size_large_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 {
call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1025, i32 1, i1 false)
ret void
}
@@ -63,7 +63,7 @@ define void @min_size_large_static_memse
; OPT-LABEL: @variable_memcpy_caller0(
; OPT-NOT: call
; OPT: phi
-define void @variable_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 {
+define amdgpu_kernel void @variable_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 {
call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false)
ret void
}
@@ -71,7 +71,7 @@ define void @variable_memcpy_caller0(i8
; OPT-LABEL: @variable_memcpy_caller1(
; OPT-NOT: call
; OPT: phi
-define void @variable_memcpy_caller1(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 {
+define amdgpu_kernel void @variable_memcpy_caller1(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 {
call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false)
ret void
}
@@ -82,7 +82,7 @@ define void @variable_memcpy_caller1(i8
; OPT-NOT: call
; OPT: phi
; OPT-NOT: call
-define void @memcpy_multi_use_one_function(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n, i64 %m) #0 {
+define amdgpu_kernel void @memcpy_multi_use_one_function(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n, i64 %m) #0 {
call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false)
call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %m, i32 1, i1 false)
ret void
@@ -94,7 +94,7 @@ define void @memcpy_multi_use_one_functi
; OPT: load i8, i8 addrspace(3)*
; OPT: getelementptr inbounds i8, i8 addrspace(1)*
; OPT: store i8
-define void @memcpy_alt_type(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n) #0 {
+define amdgpu_kernel void @memcpy_alt_type(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n) #0 {
call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n, i32 1, i1 false)
ret void
}
@@ -107,7 +107,7 @@ define void @memcpy_alt_type(i8 addrspac
; OPT: store i8
; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i32 1, i1 false)
-define void @memcpy_multi_use_one_function_keep_small(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n) #0 {
+define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n) #0 {
call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false)
call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i32 1, i1 false)
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll Tue Mar 21 16:39:51 2017
@@ -5,7 +5,7 @@
; CHECK-LABEL: {{^}}test_workitem_id_x_known_max_range:
; CHECK-NOT: v0
; CHECK: {{flat|buffer}}_store_dword {{.*}}v0
-define void @test_workitem_id_x_known_max_range(i32 addrspace(1)* nocapture %out) #0 {
+define amdgpu_kernel void @test_workitem_id_x_known_max_range(i32 addrspace(1)* nocapture %out) #0 {
entry:
%id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
%and = and i32 %id, 1023
@@ -16,7 +16,7 @@ entry:
; CHECK-LABEL: {{^}}test_workitem_id_x_known_trunc_1_bit_range:
; CHECK: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x1ff, v0
; CHECK: {{flat|buffer}}_store_dword {{.*}}[[MASKED]]
-define void @test_workitem_id_x_known_trunc_1_bit_range(i32 addrspace(1)* nocapture %out) #0 {
+define amdgpu_kernel void @test_workitem_id_x_known_trunc_1_bit_range(i32 addrspace(1)* nocapture %out) #0 {
entry:
%id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
%and = and i32 %id, 511
@@ -28,7 +28,7 @@ entry:
; CHECK-NOT: v0
; CHECK: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xff, v0
; CHECK: {{flat|buffer}}_store_dword {{.*}}[[MASKED]]
-define void @test_workitem_id_x_known_max_range_m1(i32 addrspace(1)* nocapture %out) #0 {
+define amdgpu_kernel void @test_workitem_id_x_known_max_range_m1(i32 addrspace(1)* nocapture %out) #0 {
entry:
%id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !1
%and = and i32 %id, 255
Modified: llvm/trunk/test/CodeGen/AMDGPU/lshr.v2i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/lshr.v2i16.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/lshr.v2i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/lshr.v2i16.ll Tue Mar 21 16:39:51 2017
@@ -12,7 +12,7 @@
; CIVI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
; CIVI: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 16
; CIVI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @s_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 {
+define amdgpu_kernel void @s_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 {
%result = lshr <2 x i16> %lhs, %rhs
store <2 x i16> %result, <2 x i16> addrspace(1)* %out
ret void
@@ -38,7 +38,7 @@ define void @s_lshr_v2i16(<2 x i16> addr
; CI: v_lshrrev_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; CI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
; CI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @v_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
@@ -55,7 +55,7 @@ define void @v_lshr_v2i16(<2 x i16> addr
; GFX9: s_load_dword [[RHS:s[0-9]+]]
; GFX9: {{buffer|flat}}_load_dword [[LHS:v[0-9]+]]
; GFX9: v_pk_lshrrev_b16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]]
-define void @lshr_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
+define amdgpu_kernel void @lshr_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
@@ -70,7 +70,7 @@ define void @lshr_v_s_v2i16(<2 x i16> ad
; GFX9: s_load_dword [[LHS:s[0-9]+]]
; GFX9: {{buffer|flat}}_load_dword [[RHS:v[0-9]+]]
; GFX9: v_pk_lshrrev_b16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]]
-define void @lshr_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
+define amdgpu_kernel void @lshr_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
@@ -84,7 +84,7 @@ define void @lshr_s_v_v2i16(<2 x i16> ad
; GCN-LABEL: {{^}}lshr_imm_v_v2i16:
; GCN: {{buffer|flat}}_load_dword [[RHS:v[0-9]+]]
; GFX9: v_pk_lshrrev_b16 [[RESULT:v[0-9]+]], [[RHS]], 8
-define void @lshr_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @lshr_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
@@ -98,7 +98,7 @@ define void @lshr_imm_v_v2i16(<2 x i16>
; GCN-LABEL: {{^}}lshr_v_imm_v2i16:
; GCN: {{buffer|flat}}_load_dword [[LHS:v[0-9]+]]
; GFX9: v_pk_lshrrev_b16 [[RESULT:v[0-9]+]], 8, [[LHS]]
-define void @lshr_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @lshr_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
@@ -115,7 +115,7 @@ define void @lshr_v_imm_v2i16(<2 x i16>
; GFX9: v_pk_lshrrev_b16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GFX9: v_pk_lshrrev_b16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GCN: {{buffer|flat}}_store_dwordx2
-define void @v_lshr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_lshr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i64 %tid.ext
@@ -133,7 +133,7 @@ define void @v_lshr_v4i16(<4 x i16> addr
; GFX9: v_pk_lshrrev_b16 v{{[0-9]+}}, 8, v{{[0-9]+}}
; GFX9: v_pk_lshrrev_b16 v{{[0-9]+}}, 8, v{{[0-9]+}}
; GCN: {{buffer|flat}}_store_dwordx2
-define void @lshr_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @lshr_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i64 %tid.ext
Modified: llvm/trunk/test/CodeGen/AMDGPU/mad-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mad-combine.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mad-combine.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mad-combine.ll Tue Mar 21 16:39:51 2017
@@ -31,7 +31,7 @@ declare float @llvm.fmuladd.f32(float, f
; SI-DENORM: buffer_store_dword [[RESULT]]
; SI-STD: buffer_store_dword [[C]]
-define void @combine_to_mad_f32_0(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @combine_to_mad_f32_0(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -70,7 +70,7 @@ define void @combine_to_mad_f32_0(float
; SI-STD-DAG: buffer_store_dword [[C]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-STD-DAG: buffer_store_dword [[D]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; SI: s_endpgm
-define void @combine_to_mad_f32_0_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @combine_to_mad_f32_0_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -107,7 +107,7 @@ define void @combine_to_mad_f32_0_2use(f
; SI-DENORM: buffer_store_dword [[RESULT]]
; SI-STD: buffer_store_dword [[C]]
-define void @combine_to_mad_f32_1(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @combine_to_mad_f32_1(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -137,7 +137,7 @@ define void @combine_to_mad_f32_1(float
; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
; SI: buffer_store_dword [[RESULT]]
-define void @combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -174,7 +174,7 @@ define void @combine_to_mad_fsub_0_f32(f
; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; SI: s_endpgm
-define void @combine_to_mad_fsub_0_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @combine_to_mad_fsub_0_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -209,7 +209,7 @@ define void @combine_to_mad_fsub_0_f32_2
; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
; SI: buffer_store_dword [[RESULT]]
-define void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -245,7 +245,7 @@ define void @combine_to_mad_fsub_1_f32(f
; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; SI: s_endpgm
-define void @combine_to_mad_fsub_1_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @combine_to_mad_fsub_1_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -281,7 +281,7 @@ define void @combine_to_mad_fsub_1_f32_2
; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
; SI: buffer_store_dword [[RESULT]]
-define void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -319,7 +319,7 @@ define void @combine_to_mad_fsub_2_f32(f
; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; SI: s_endpgm
-define void @combine_to_mad_fsub_2_f32_2uses_neg(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @combine_to_mad_fsub_2_f32_2uses_neg(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -362,7 +362,7 @@ define void @combine_to_mad_fsub_2_f32_2
; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; SI: s_endpgm
-define void @combine_to_mad_fsub_2_f32_2uses_mul(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @combine_to_mad_fsub_2_f32_2uses_mul(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -404,7 +404,7 @@ define void @combine_to_mad_fsub_2_f32_2
; SI-DENORM: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[C]], [[TMP1]]
; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-define void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -447,7 +447,7 @@ define void @aggressive_combine_to_mad_f
; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: s_endpgm
-define void @aggressive_combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @aggressive_combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -497,7 +497,7 @@ define void @aggressive_combine_to_mad_f
; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: s_endpgm
-define void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -548,7 +548,7 @@ define void @aggressive_combine_to_mad_f
; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: s_endpgm
-define void @aggressive_combine_to_mad_fsub_3_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @aggressive_combine_to_mad_fsub_3_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
Modified: llvm/trunk/test/CodeGen/AMDGPU/mad24-get-global-id.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mad24-get-global-id.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mad24-get-global-id.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mad24-get-global-id.ll Tue Mar 21 16:39:51 2017
@@ -11,7 +11,7 @@ declare i8 addrspace(2)* @llvm.amdgcn.di
; GCN: s_and_b32 [[WGSIZEX:s[0-9]+]], {{s[0-9]+}}, 0xffff
; GCN: v_mov_b32_e32 [[VWGSIZEX:v[0-9]+]], [[WGSIZEX]]
; GCN: v_mad_u32_u24 v{{[0-9]+}}, [[VWGSIZEX]], s8, v0
-define void @get_global_id_0(i32 addrspace(1)* %out) #1 {
+define amdgpu_kernel void @get_global_id_0(i32 addrspace(1)* %out) #1 {
%dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%cast.dispatch.ptr = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
%gep = getelementptr inbounds i32, i32 addrspace(2)* %cast.dispatch.ptr, i64 1
Modified: llvm/trunk/test/CodeGen/AMDGPU/mad_int24.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mad_int24.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mad_int24.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mad_int24.ll Tue Mar 21 16:39:51 2017
@@ -11,7 +11,7 @@
; CM: MULADD_INT24
; SI-NOT: and
; SI: v_mad_i32_i24
-define void @i32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
+define amdgpu_kernel void @i32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
entry:
%0 = shl i32 %a, 8
%a_24 = ashr i32 %0, 8
Modified: llvm/trunk/test/CodeGen/AMDGPU/mad_uint24.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mad_uint24.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mad_uint24.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mad_uint24.ll Tue Mar 21 16:39:51 2017
@@ -11,7 +11,7 @@ declare i32 @llvm.r600.read.tidig.x() no
; SI: v_mad_u32_u24
; VI: v_mad_u32_u24
-define void @u32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
+define amdgpu_kernel void @u32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
entry:
%0 = shl i32 %a, 8
%a_24 = lshr i32 %0, 8
@@ -32,7 +32,7 @@ entry:
; FIXME: Should be using scalar instructions here.
; GCN: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
; GCN: v_bfe_i32 v{{[0-9]}}, [[MAD]], 0, 16
-define void @i16_mad24(i32 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) {
+define amdgpu_kernel void @i16_mad24(i32 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) {
entry:
%0 = mul i16 %a, %b
%1 = add i16 %0, %c
@@ -49,7 +49,7 @@ entry:
; EG: 8
; GCN: v_mad_u32_u24 [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
; GCN: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 8
-define void @i8_mad24(i32 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) {
+define amdgpu_kernel void @i8_mad24(i32 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) {
entry:
%0 = mul i8 %a, %b
%1 = add i8 %0, %c
@@ -68,7 +68,7 @@ entry:
; FUNC-LABEL: {{^}}i24_i32_i32_mad:
; EG: CNDE_INT
; SI: v_cndmask
-define void @i24_i32_i32_mad(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
+define amdgpu_kernel void @i24_i32_i32_mad(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
entry:
%0 = ashr i32 %a, 8
%1 = icmp ne i32 %c, 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/madak.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/madak.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/madak.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/madak.ll Tue Mar 21 16:39:51 2017
@@ -10,7 +10,7 @@ declare float @llvm.fabs.f32(float) noun
; GCN: buffer_load_dword [[VA:v[0-9]+]]
; GCN: buffer_load_dword [[VB:v[0-9]+]]
; GCN: v_madak_f32_e32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
-define void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
%in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
@@ -37,7 +37,7 @@ define void @madak_f32(float addrspace(1
; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VB]], [[VA]], [[VK]]
; GCN-DAG: v_mac_f32_e32 [[VK]], [[VC]], [[VA]]
; GCN: s_endpgm
-define void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@@ -64,7 +64,7 @@ define void @madak_2_use_f32(float addrs
; GCN-LABEL: {{^}}madak_m_inline_imm_f32:
; GCN: buffer_load_dword [[VA:v[0-9]+]]
; GCN: v_madak_f32_e32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
-define void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) nounwind {
+define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -84,7 +84,7 @@ define void @madak_m_inline_imm_f32(floa
; GCN: buffer_load_dword [[VA:v[0-9]+]]
; GCN: buffer_load_dword [[VB:v[0-9]+]]
; GCN: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
-define void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+define amdgpu_kernel void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
%in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
@@ -106,7 +106,7 @@ define void @madak_inline_imm_f32(float
; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]]
; GCN-NOT: v_madak_f32
; GCN: v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
-define void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) nounwind {
+define amdgpu_kernel void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -125,7 +125,7 @@ define void @s_v_madak_f32(float addrspa
; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]]
; GCN-NOT: v_madak_f32
; GCN: v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
-define void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) nounwind {
+define amdgpu_kernel void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -141,7 +141,7 @@ define void @v_s_madak_f32(float addrspa
; GCN-LABEL: {{^}}s_s_madak_f32:
; GCN-NOT: v_madak_f32
; GCN: v_mac_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-define void @s_s_madak_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
+define amdgpu_kernel void @s_s_madak_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
%mul = fmul float %a, %b
%madak = fadd float %mul, 10.0
store float %madak, float addrspace(1)* %out, align 4
@@ -153,7 +153,7 @@ define void @s_s_madak_f32(float addrspa
; GCN: buffer_load_dword [[VB:v[0-9]+]]
; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{[sv][0-9]+}}
; GCN: s_endpgm
-define void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
%in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
@@ -175,7 +175,7 @@ define void @no_madak_src0_modifier_f32(
; GCN: buffer_load_dword [[VB:v[0-9]+]]
; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}}
; GCN: s_endpgm
-define void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
%in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
@@ -201,7 +201,7 @@ define void @no_madak_src1_modifier_f32(
; GCN: v_madak_f32_e32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[VGPR]], [[MADAK]]
; GCN: buffer_store_dword [[MUL]]
-define void @madak_constant_bus_violation(i32 %arg1, float %sgpr0, float %sgpr1) #0 {
+define amdgpu_kernel void @madak_constant_bus_violation(i32 %arg1, float %sgpr0, float %sgpr1) #0 {
bb:
%tmp = icmp eq i32 %arg1, 0
br i1 %tmp, label %bb3, label %bb4
Modified: llvm/trunk/test/CodeGen/AMDGPU/madmk.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/madmk.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/madmk.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/madmk.ll Tue Mar 21 16:39:51 2017
@@ -12,7 +12,7 @@ declare float @llvm.fabs.f32(float) noun
; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; GCN: v_mac_f32_e32 [[VB]], 0x41200000, [[VA]]
-define void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -35,7 +35,7 @@ define void @madmk_f32(float addrspace(1
; GCN-DAG: v_mac_f32_e32 [[VB]], [[VK]], [[VA]]
; GCN-DAG: v_mac_f32_e32 [[VC]], [[VK]], [[VA]]
; GCN: s_endpgm
-define void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@@ -64,7 +64,7 @@ define void @madmk_2_use_f32(float addrs
; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; GCN: v_mac_f32_e32 [[VB]], 4.0, [[VA]]
-define void @madmk_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @madmk_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -83,7 +83,7 @@ define void @madmk_inline_imm_f32(float
; GCN-NOT: v_madmk_f32
; GCN: v_mac_f32_e32
; GCN: s_endpgm
-define void @s_s_madmk_f32(float addrspace(1)* noalias %out, float %a, float %b) nounwind {
+define amdgpu_kernel void @s_s_madmk_f32(float addrspace(1)* noalias %out, float %a, float %b) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -97,7 +97,7 @@ define void @s_s_madmk_f32(float addrspa
; GCN-NOT: v_madmk_f32
; GCN: v_mad_f32
; GCN: s_endpgm
-define void @v_s_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %b) nounwind {
+define amdgpu_kernel void @v_s_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %b) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -113,7 +113,7 @@ define void @v_s_madmk_f32(float addrspa
; GCN-NOT: v_madmk_f32
; GCN: v_mac_f32_e32
; GCN: s_endpgm
-define void @scalar_vector_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %a) nounwind {
+define amdgpu_kernel void @scalar_vector_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %a) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -130,7 +130,7 @@ define void @scalar_vector_madmk_f32(flo
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
; GCN: v_mad_f32 {{v[0-9]+}}, [[VK]], |[[VA]]|, [[VB]]
-define void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -151,7 +151,7 @@ define void @no_madmk_src0_modifier_f32(
; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, |{{[sv][0-9]+}}|
-define void @no_madmk_src2_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @no_madmk_src2_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@@ -172,7 +172,7 @@ define void @no_madmk_src2_modifier_f32(
; GCN: buffer_load_dword [[A:v[0-9]+]]
; GCN: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
; GCN: v_mad_f32 {{v[0-9]+}}, [[VK]], [[A]], 2.0
-define void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -189,7 +189,7 @@ define void @madmk_add_inline_imm_f32(fl
; SI: s_xor_b64
; SI: v_mac_f32_e32 {{v[0-9]+}}, 0x472aee8c, {{v[0-9]+}}
; SI: s_or_b64
-define void @kill_madmk_verifier_error() nounwind {
+define amdgpu_kernel void @kill_madmk_verifier_error() nounwind {
bb:
br label %bb2
Modified: llvm/trunk/test/CodeGen/AMDGPU/max.i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/max.i16.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/max.i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/max.i16.ll Tue Mar 21 16:39:51 2017
@@ -4,7 +4,7 @@
; FIXME: Need to handle non-uniform case for function below (load without gep).
; GCN-LABEL: {{^}}v_test_imax_sge_i16:
; VIPLUS: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @v_test_imax_sge_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
+define amdgpu_kernel void @v_test_imax_sge_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
@@ -23,7 +23,7 @@ define void @v_test_imax_sge_i16(i16 add
; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GFX9: v_pk_max_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @v_test_imax_sge_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %aptr, <2 x i16> addrspace(1)* %bptr) nounwind {
+define amdgpu_kernel void @v_test_imax_sge_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %aptr, <2 x i16> addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep0 = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %bptr, i32 %tid
@@ -45,7 +45,7 @@ define void @v_test_imax_sge_v2i16(<2 x
; GFX9: v_pk_max_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GFX9: v_pk_max_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @v_test_imax_sge_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %aptr, <3 x i16> addrspace(1)* %bptr) nounwind {
+define amdgpu_kernel void @v_test_imax_sge_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %aptr, <3 x i16> addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep0 = getelementptr <3 x i16>, <3 x i16> addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr <3 x i16>, <3 x i16> addrspace(1)* %bptr, i32 %tid
@@ -67,7 +67,7 @@ define void @v_test_imax_sge_v3i16(<3 x
; GFX9: v_pk_max_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GFX9: v_pk_max_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @v_test_imax_sge_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %aptr, <4 x i16> addrspace(1)* %bptr) nounwind {
+define amdgpu_kernel void @v_test_imax_sge_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %aptr, <4 x i16> addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep0 = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %bptr, i32 %tid
@@ -83,7 +83,7 @@ define void @v_test_imax_sge_v4i16(<4 x
; FIXME: Need to handle non-uniform case for function below (load without gep).
; GCN-LABEL: {{^}}v_test_imax_sgt_i16:
; VIPLUS: v_max_i16_e32
-define void @v_test_imax_sgt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
+define amdgpu_kernel void @v_test_imax_sgt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
@@ -99,7 +99,7 @@ define void @v_test_imax_sgt_i16(i16 add
; FIXME: Need to handle non-uniform case for function below (load without gep).
; GCN-LABEL: {{^}}v_test_umax_uge_i16:
; VIPLUS: v_max_u16_e32
-define void @v_test_umax_uge_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
+define amdgpu_kernel void @v_test_umax_uge_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
@@ -115,7 +115,7 @@ define void @v_test_umax_uge_i16(i16 add
; FIXME: Need to handle non-uniform case for function below (load without gep).
; GCN-LABEL: {{^}}v_test_umax_ugt_i16:
; VIPLUS: v_max_u16_e32
-define void @v_test_umax_ugt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
+define amdgpu_kernel void @v_test_umax_ugt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
@@ -133,7 +133,7 @@ define void @v_test_umax_ugt_i16(i16 add
; VI: v_max_u16_e32
; GFX9: v_pk_max_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @v_test_umax_ugt_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %aptr, <2 x i16> addrspace(1)* %bptr) nounwind {
+define amdgpu_kernel void @v_test_umax_ugt_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %aptr, <2 x i16> addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep0 = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %bptr, i32 %tid
Modified: llvm/trunk/test/CodeGen/AMDGPU/max.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/max.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/max.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/max.ll Tue Mar 21 16:39:51 2017
@@ -6,7 +6,7 @@
; SI: v_max_i32_e32
; EG: MAX_INT
-define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+define amdgpu_kernel void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%a = load i32, i32 addrspace(1)* %aptr, align 4
%b = load i32, i32 addrspace(1)* %bptr, align 4
%cmp = icmp sge i32 %a, %b
@@ -26,7 +26,7 @@ define void @v_test_imax_sge_i32(i32 add
; EG: MAX_INT
; EG: MAX_INT
; EG: MAX_INT
-define void @v_test_imax_sge_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %aptr, <4 x i32> addrspace(1)* %bptr) nounwind {
+define amdgpu_kernel void @v_test_imax_sge_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %aptr, <4 x i32> addrspace(1)* %bptr) nounwind {
%a = load <4 x i32>, <4 x i32> addrspace(1)* %aptr, align 4
%b = load <4 x i32>, <4 x i32> addrspace(1)* %bptr, align 4
%cmp = icmp sge <4 x i32> %a, %b
@@ -39,7 +39,7 @@ define void @v_test_imax_sge_v4i32(<4 x
; SI: s_max_i32
; EG: MAX_INT
-define void @s_test_imax_sge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @s_test_imax_sge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp sge i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %out, align 4
@@ -50,7 +50,7 @@ define void @s_test_imax_sge_i32(i32 add
; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9
; EG: MAX_INT {{.*}}literal.{{[xyzw]}}
-define void @s_test_imax_sge_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
+define amdgpu_kernel void @s_test_imax_sge_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
%cmp = icmp sge i32 %a, 9
%val = select i1 %cmp, i32 %a, i32 9
store i32 %val, i32 addrspace(1)* %out, align 4
@@ -63,7 +63,7 @@ define void @s_test_imax_sge_imm_i32(i32
; SI: v_max_i32_e32
; EG: MAX_INT
-define void @v_test_imax_sge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind {
+define amdgpu_kernel void @v_test_imax_sge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind {
%a = load i8, i8 addrspace(1)* %aptr, align 1
%b = load i8, i8 addrspace(1)* %bptr, align 1
%cmp = icmp sge i8 %a, %b
@@ -76,7 +76,7 @@ define void @v_test_imax_sge_i8(i8 addrs
; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9
; EG: MAX_INT {{.*}}literal.{{[xyzw]}}
-define void @s_test_imax_sgt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
+define amdgpu_kernel void @s_test_imax_sgt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
%cmp = icmp sgt i32 %a, 9
%val = select i1 %cmp, i32 %a, i32 9
store i32 %val, i32 addrspace(1)* %out, align 4
@@ -89,7 +89,7 @@ define void @s_test_imax_sgt_imm_i32(i32
; EG: MAX_INT {{.*}}literal.{{[xyzw]}}
; EG: MAX_INT {{.*}}literal.{{[xyzw]}}
-define void @s_test_imax_sgt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind {
+define amdgpu_kernel void @s_test_imax_sgt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind {
%cmp = icmp sgt <2 x i32> %a, <i32 9, i32 9>
%val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> <i32 9, i32 9>
store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4
@@ -100,7 +100,7 @@ define void @s_test_imax_sgt_imm_v2i32(<
; SI: v_max_i32_e32
; EG: MAX_INT
-define void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+define amdgpu_kernel void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%a = load i32, i32 addrspace(1)* %aptr, align 4
%b = load i32, i32 addrspace(1)* %bptr, align 4
%cmp = icmp sgt i32 %a, %b
@@ -113,7 +113,7 @@ define void @v_test_imax_sgt_i32(i32 add
; SI: s_max_i32
; EG: MAX_INT
-define void @s_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @s_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp sgt i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %out, align 4
@@ -124,7 +124,7 @@ define void @s_test_imax_sgt_i32(i32 add
; SI: v_max_u32_e32
; EG: MAX_UINT
-define void @v_test_umax_uge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+define amdgpu_kernel void @v_test_umax_uge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%a = load i32, i32 addrspace(1)* %aptr, align 4
%b = load i32, i32 addrspace(1)* %bptr, align 4
%cmp = icmp uge i32 %a, %b
@@ -137,7 +137,7 @@ define void @v_test_umax_uge_i32(i32 add
; SI: s_max_u32
; EG: MAX_UINT
-define void @s_test_umax_uge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @s_test_umax_uge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp uge i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %out, align 4
@@ -155,7 +155,7 @@ define void @s_test_umax_uge_i32(i32 add
; EG: MAX_UINT
; EG: MAX_UINT
; EG-NOT: MAX_UINT
-define void @s_test_umax_uge_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a, <3 x i32> %b) nounwind {
+define amdgpu_kernel void @s_test_umax_uge_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a, <3 x i32> %b) nounwind {
%cmp = icmp uge <3 x i32> %a, %b
%val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b
store <3 x i32> %val, <3 x i32> addrspace(1)* %out, align 4
@@ -168,7 +168,7 @@ define void @s_test_umax_uge_v3i32(<3 x
; SI: v_max_u32_e32
; EG: MAX_UINT
-define void @v_test_umax_uge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind {
+define amdgpu_kernel void @v_test_umax_uge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind {
%a = load i8, i8 addrspace(1)* %aptr, align 1
%b = load i8, i8 addrspace(1)* %bptr, align 1
%cmp = icmp uge i8 %a, %b
@@ -181,7 +181,7 @@ define void @v_test_umax_uge_i8(i8 addrs
; SI: v_max_u32_e32
; EG: MAX_UINT
-define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+define amdgpu_kernel void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%a = load i32, i32 addrspace(1)* %aptr, align 4
%b = load i32, i32 addrspace(1)* %bptr, align 4
%cmp = icmp ugt i32 %a, %b
@@ -194,7 +194,7 @@ define void @v_test_umax_ugt_i32(i32 add
; SI: s_max_u32
; EG: MAX_UINT
-define void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp ugt i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %out, align 4
@@ -207,7 +207,7 @@ define void @s_test_umax_ugt_i32(i32 add
; EG: MAX_UINT {{.*}}literal.{{[xyzw]}}
; EG: MAX_UINT {{.*}}literal.{{[xyzw]}}
-define void @s_test_umax_ugt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind {
+define amdgpu_kernel void @s_test_umax_ugt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind {
%cmp = icmp ugt <2 x i32> %a, <i32 15, i32 23>
%val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> <i32 15, i32 23>
store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4
@@ -223,7 +223,7 @@ define void @s_test_umax_ugt_imm_v2i32(<
; SI: buffer_store_dword [[VMAX]]
; EG: MAX_UINT
-define void @simplify_demanded_bits_test_umax_ugt_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) nounwind {
+define amdgpu_kernel void @simplify_demanded_bits_test_umax_ugt_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) nounwind {
%a.ext = zext i16 %a to i32
%b.ext = zext i16 %b to i32
%cmp = icmp ugt i32 %a.ext, %b.ext
@@ -243,7 +243,7 @@ define void @simplify_demanded_bits_test
; SI: buffer_store_dword [[VMAX]]
; EG: MAX_INT
-define void @simplify_demanded_bits_test_max_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind {
+define amdgpu_kernel void @simplify_demanded_bits_test_max_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind {
%a.ext = sext i16 %a to i32
%b.ext = sext i16 %b to i32
%cmp = icmp sgt i32 %a.ext, %b.ext
@@ -262,7 +262,7 @@ define void @simplify_demanded_bits_test
; SI: s_max_i32
; EG: MAX_INT
-define void @s_test_imax_sge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
+define amdgpu_kernel void @s_test_imax_sge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
%cmp = icmp sge i16 %a, %b
%val = select i1 %cmp, i16 %a, i16 %b
store i16 %val, i16 addrspace(1)* %out
@@ -275,7 +275,7 @@ define void @s_test_imax_sge_i16(i16 add
; EG: MAX_UINT
; EG: MAX_UINT
-define void @test_umax_ugt_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
+define amdgpu_kernel void @test_umax_ugt_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%tmp = icmp ugt i64 %a, %b
%val = select i1 %tmp, i64 %a, i64 %b
store i64 %val, i64 addrspace(1)* %out, align 8
@@ -287,7 +287,7 @@ define void @test_umax_ugt_i64(i64 addrs
; EG: MAX_UINT
; EG: MAX_UINT
-define void @test_umax_uge_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
+define amdgpu_kernel void @test_umax_uge_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%tmp = icmp uge i64 %a, %b
%val = select i1 %tmp, i64 %a, i64 %b
store i64 %val, i64 addrspace(1)* %out, align 8
@@ -299,7 +299,7 @@ define void @test_umax_uge_i64(i64 addrs
; EG-DAG: MAX_UINT
; EG-DAG: MAX_INT
-define void @test_imax_sgt_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
+define amdgpu_kernel void @test_imax_sgt_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%tmp = icmp sgt i64 %a, %b
%val = select i1 %tmp, i64 %a, i64 %b
store i64 %val, i64 addrspace(1)* %out, align 8
@@ -311,7 +311,7 @@ define void @test_imax_sgt_i64(i64 addrs
; EG-DAG: MAX_UINT
; EG-DAG: MAX_INT
-define void @test_imax_sge_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
+define amdgpu_kernel void @test_imax_sge_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%tmp = icmp sge i64 %a, %b
%val = select i1 %tmp, i64 %a, i64 %b
store i64 %val, i64 addrspace(1)* %out, align 8
Modified: llvm/trunk/test/CodeGen/AMDGPU/max3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/max3.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/max3.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/max3.ll Tue Mar 21 16:39:51 2017
@@ -4,7 +4,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
; FUNC-LABEL: @v_test_imax3_sgt_i32
; SI: v_max3_i32
-define void @v_test_imax3_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+define amdgpu_kernel void @v_test_imax3_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
@@ -23,7 +23,7 @@ define void @v_test_imax3_sgt_i32(i32 ad
; FUNC-LABEL: @v_test_umax3_ugt_i32
; SI: v_max3_u32
-define void @v_test_umax3_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+define amdgpu_kernel void @v_test_umax3_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
Modified: llvm/trunk/test/CodeGen/AMDGPU/mem-builtins.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mem-builtins.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mem-builtins.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mem-builtins.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@ declare i32 @strcmp(i8* nocapture, i8* n
; ERROR: error: <unknown>:0:0: in function test_memcmp void (i8 addrspace(1)*, i8 addrspace(1)*, i32*): unsupported call to function memcmp
-define void @test_memcmp(i8 addrspace(1)* %x, i8 addrspace(1)* %y, i32* nocapture %p) #0 {
+define amdgpu_kernel void @test_memcmp(i8 addrspace(1)* %x, i8 addrspace(1)* %y, i32* nocapture %p) #0 {
entry:
%cmp = tail call i32 @memcmp(i8 addrspace(1)* %x, i8 addrspace(1)* %y, i64 2)
store volatile i32 %cmp, i32 addrspace(1)* undef
@@ -17,35 +17,35 @@ entry:
}
; ERROR: error: <unknown>:0:0: in function test_memchr void (i8 addrspace(1)*, i32, i64): unsupported call to function memchr
-define void @test_memchr(i8 addrspace(1)* %src, i32 %char, i64 %len) #0 {
+define amdgpu_kernel void @test_memchr(i8 addrspace(1)* %src, i32 %char, i64 %len) #0 {
%res = call i8 addrspace(1)* @memchr(i8 addrspace(1)* %src, i32 %char, i64 %len)
store volatile i8 addrspace(1)* %res, i8 addrspace(1)* addrspace(1)* undef
ret void
}
; ERROR: error: <unknown>:0:0: in function test_strcpy void (i8*, i8*): unsupported call to function strcpy
-define void @test_strcpy(i8* %dst, i8* %src) #0 {
+define amdgpu_kernel void @test_strcpy(i8* %dst, i8* %src) #0 {
%res = call i8* @strcpy(i8* %dst, i8* %src)
store volatile i8* %res, i8* addrspace(1)* undef
ret void
}
; ERROR: error: <unknown>:0:0: in function test_strcmp void (i8*, i8*): unsupported call to function strcmp
-define void @test_strcmp(i8* %src0, i8* %src1) #0 {
+define amdgpu_kernel void @test_strcmp(i8* %src0, i8* %src1) #0 {
%res = call i32 @strcmp(i8* %src0, i8* %src1)
store volatile i32 %res, i32 addrspace(1)* undef
ret void
}
; ERROR: error: <unknown>:0:0: in function test_strlen void (i8*): unsupported call to function strlen
-define void @test_strlen(i8* %src) #0 {
+define amdgpu_kernel void @test_strlen(i8* %src) #0 {
%res = call i32 @strlen(i8* %src)
store volatile i32 %res, i32 addrspace(1)* undef
ret void
}
; ERROR: error: <unknown>:0:0: in function test_strnlen void (i8*, i32): unsupported call to function strnlen
-define void @test_strnlen(i8* %src, i32 %size) #0 {
+define amdgpu_kernel void @test_strnlen(i8* %src, i32 %size) #0 {
%res = call i32 @strnlen(i8* %src, i32 %size)
store volatile i32 %res, i32 addrspace(1)* undef
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/merge-stores.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/merge-stores.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/merge-stores.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/merge-stores.ll Tue Mar 21 16:39:51 2017
@@ -13,7 +13,7 @@
; GCN: buffer_store_byte
; GCN: buffer_store_byte
; GCN: s_endpgm
-define void @merge_global_store_2_constants_i8(i8 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @merge_global_store_2_constants_i8(i8 addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
store i8 123, i8 addrspace(1)* %out.gep.1
@@ -25,7 +25,7 @@ define void @merge_global_store_2_consta
; GCN: buffer_store_byte
; GCN: buffer_store_byte
; GCN: s_endpgm
-define void @merge_global_store_2_constants_i8_natural_align(i8 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @merge_global_store_2_constants_i8_natural_align(i8 addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
store i8 123, i8 addrspace(1)* %out.gep.1
@@ -35,7 +35,7 @@ define void @merge_global_store_2_consta
; GCN-LABEL: {{^}}merge_global_store_2_constants_i16:
; GCN: buffer_store_dword v
-define void @merge_global_store_2_constants_i16(i16 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @merge_global_store_2_constants_i16(i16 addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
store i16 123, i16 addrspace(1)* %out.gep.1
@@ -45,7 +45,7 @@ define void @merge_global_store_2_consta
; GCN-LABEL: {{^}}merge_global_store_2_constants_0_i16:
; GCN: buffer_store_dword v
-define void @merge_global_store_2_constants_0_i16(i16 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @merge_global_store_2_constants_0_i16(i16 addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
store i16 0, i16 addrspace(1)* %out.gep.1
@@ -57,7 +57,7 @@ define void @merge_global_store_2_consta
; GCN: buffer_store_short
; GCN: buffer_store_short
; GCN: s_endpgm
-define void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
store i16 123, i16 addrspace(1)* %out.gep.1
@@ -69,7 +69,7 @@ define void @merge_global_store_2_consta
; SI-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8
; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @merge_global_store_2_constants_i32(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @merge_global_store_2_constants_i32(i32 addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
store i32 123, i32 addrspace(1)* %out.gep.1
@@ -79,7 +79,7 @@ define void @merge_global_store_2_consta
; GCN-LABEL: {{^}}merge_global_store_2_constants_i32_f32:
; GCN: buffer_store_dwordx2
-define void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
%out.gep.1.bc = bitcast i32 addrspace(1)* %out.gep.1 to float addrspace(1)*
store float 1.0, float addrspace(1)* %out.gep.1.bc
@@ -91,7 +91,7 @@ define void @merge_global_store_2_consta
; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], 4.0
; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], 0x7b
; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
-define void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
%out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)*
store i32 123, i32 addrspace(1)* %out.gep.1.bc
@@ -105,7 +105,7 @@ define void @merge_global_store_2_consta
; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x7b{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x4d2{{$}}
; GCN: buffer_store_dwordx4 v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
%out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
%out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
@@ -119,7 +119,7 @@ define void @merge_global_store_4_consta
; GCN-LABEL: {{^}}merge_global_store_4_constants_f32_order:
; GCN: buffer_store_dwordx4
-define void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
%out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
%out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
@@ -134,7 +134,7 @@ define void @merge_global_store_4_consta
; First store is out of order.
; GCN-LABEL: {{^}}merge_global_store_4_constants_f32:
; GCN: buffer_store_dwordx4
-define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
%out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
%out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
@@ -149,7 +149,7 @@ define void @merge_global_store_4_consta
; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32:
; GCN-AA: buffer_store_dwordx4 v
; GCN: s_endpgm
-define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
%out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
%out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
@@ -169,7 +169,7 @@ define void @merge_global_store_4_consta
; SI-DAG: buffer_store_dword
; SI-NOT: buffer_store_dword
; GCN: s_endpgm
-define void @merge_global_store_3_constants_i32(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @merge_global_store_3_constants_i32(i32 addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
%out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
@@ -181,7 +181,7 @@ define void @merge_global_store_3_consta
; GCN-LABEL: {{^}}merge_global_store_2_constants_i64:
; GCN: buffer_store_dwordx4
-define void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1
store i64 123, i64 addrspace(1)* %out.gep.1
@@ -192,7 +192,7 @@ define void @merge_global_store_2_consta
; GCN-LABEL: {{^}}merge_global_store_4_constants_i64:
; GCN: buffer_store_dwordx4
; GCN: buffer_store_dwordx4
-define void @merge_global_store_4_constants_i64(i64 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @merge_global_store_4_constants_i64(i64 addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1
%out.gep.2 = getelementptr i64, i64 addrspace(1)* %out, i64 2
%out.gep.3 = getelementptr i64, i64 addrspace(1)* %out, i64 3
@@ -207,7 +207,7 @@ define void @merge_global_store_4_consta
; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_i32:
; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
; GCN: buffer_store_dwordx2 [[LOAD]]
-define void @merge_global_store_2_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
%in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
@@ -222,7 +222,7 @@ define void @merge_global_store_2_adjace
; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_i32_nonzero_base:
; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
; GCN: buffer_store_dwordx2 [[LOAD]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
-define void @merge_global_store_2_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 2
%in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 3
@@ -241,7 +241,7 @@ define void @merge_global_store_2_adjace
; GCN: buffer_load_dword v
; GCN: buffer_store_dword v
; GCN: buffer_store_dword v
-define void @merge_global_store_2_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @merge_global_store_2_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
%in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
@@ -256,7 +256,7 @@ define void @merge_global_store_2_adjace
; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i32:
; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
; GCN: buffer_store_dwordx4 [[LOAD]]
-define void @merge_global_store_4_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
%out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
%out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
@@ -283,7 +283,7 @@ define void @merge_global_store_4_adjace
; SI-DAG: buffer_store_dword v
; SI-DAG: buffer_store_dwordx2 v
; GCN: s_endpgm
-define void @merge_global_store_3_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @merge_global_store_3_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
%out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
%in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
@@ -302,7 +302,7 @@ define void @merge_global_store_3_adjace
; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_f32:
; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
; GCN: buffer_store_dwordx4 [[LOAD]]
-define void @merge_global_store_4_adjacent_loads_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @merge_global_store_4_adjacent_loads_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
%out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
%out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
@@ -325,7 +325,7 @@ define void @merge_global_store_4_adjace
; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i32_nonzero_base:
; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
; GCN: buffer_store_dwordx4 [[LOAD]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28
-define void @merge_global_store_4_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 11
%in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 12
%in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 13
@@ -351,7 +351,7 @@ define void @merge_global_store_4_adjace
; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
; GCN: s_barrier
; GCN: buffer_store_dwordx4 [[LOAD]]
-define void @merge_global_store_4_adjacent_loads_inverse_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @merge_global_store_4_adjacent_loads_inverse_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
%out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
%out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
@@ -388,7 +388,7 @@ define void @merge_global_store_4_adjace
; GCN: buffer_store_dword v
; GCN: buffer_store_dword v
; GCN: buffer_store_dword v
-define void @merge_global_store_4_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @merge_global_store_4_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
%out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
%out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
@@ -416,7 +416,7 @@ define void @merge_global_store_4_adjace
; GCN: buffer_load_dword [[LOAD:v[0-9]+]]
; GCN: buffer_store_dword [[LOAD]]
; GCN: s_endpgm
-define void @merge_global_store_4_adjacent_loads_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
%out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1
%out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2
%out.gep.3 = getelementptr i8, i8 addrspace(1)* %out, i8 3
@@ -446,7 +446,7 @@ define void @merge_global_store_4_adjace
; GCN: buffer_store_byte
; GCN: buffer_store_byte
; GCN: s_endpgm
-define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
%out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1
%out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2
%out.gep.3 = getelementptr i8, i8 addrspace(1)* %out, i8 3
@@ -470,7 +470,7 @@ define void @merge_global_store_4_adjace
; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
; GCN: buffer_store_dwordx4 [[LOAD]]
; GCN: s_endpgm
-define void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
%out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
%out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
@@ -492,7 +492,7 @@ define void @merge_global_store_4_vector
; GCN: ds_write_b8
; GCN: ds_write_b8
; GCN: s_endpgm
-define void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 {
+define amdgpu_kernel void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 {
%out.gep.1 = getelementptr i8, i8 addrspace(3)* %out, i32 1
store i8 123, i8 addrspace(3)* %out.gep.1
@@ -504,7 +504,7 @@ define void @merge_local_store_2_constan
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b
; GCN: ds_write2_b32 v{{[0-9]+}}, v[[LO]], v[[HI]] offset1:1{{$}}
-define void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 {
+define amdgpu_kernel void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
store i32 123, i32 addrspace(3)* %out.gep.1
@@ -522,7 +522,7 @@ define void @merge_local_store_2_constan
; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, [[K0]], [[K1]] offset1:1
; GCN: s_endpgm
-define void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 {
+define amdgpu_kernel void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
%out.gep.2 = getelementptr i32, i32 addrspace(3)* %out, i32 2
%out.gep.3 = getelementptr i32, i32 addrspace(3)* %out, i32 3
@@ -540,7 +540,7 @@ define void @merge_local_store_4_constan
; GCN: buffer_store_dwordx4 v{{\[}}[[LO]]:[[HI4]]{{\]}}
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 11{{$}}
; GCN: buffer_store_dword v[[HI]]
-define void @merge_global_store_5_constants_i32(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @merge_global_store_5_constants_i32(i32 addrspace(1)* %out) {
store i32 9, i32 addrspace(1)* %out, align 4
%idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
store i32 12, i32 addrspace(1)* %idx1, align 4
@@ -556,7 +556,7 @@ define void @merge_global_store_5_consta
; GCN-LABEL: {{^}}merge_global_store_6_constants_i32:
; GCN: buffer_store_dwordx4
; GCN: buffer_store_dwordx2
-define void @merge_global_store_6_constants_i32(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @merge_global_store_6_constants_i32(i32 addrspace(1)* %out) {
store i32 13, i32 addrspace(1)* %out, align 4
%idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
store i32 15, i32 addrspace(1)* %idx1, align 4
@@ -575,7 +575,7 @@ define void @merge_global_store_6_consta
; GCN: buffer_store_dwordx4
; GCN: buffer_store_dwordx2
; GCN: buffer_store_dword v
-define void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) {
store i32 34, i32 addrspace(1)* %out, align 4
%idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
store i32 999, i32 addrspace(1)* %idx1, align 4
@@ -596,7 +596,7 @@ define void @merge_global_store_7_consta
; GCN: buffer_store_dwordx4
; GCN: buffer_store_dwordx4
; GCN: s_endpgm
-define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) {
store i32 34, i32 addrspace(1)* %out, align 4
%idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
store i32 999, i32 addrspace(1)* %idx1, align 4
@@ -630,7 +630,7 @@ define void @merge_global_store_8_consta
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
; GCN: ScratchSize: 0{{$}}
-define void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> addrspace(1)* noalias %in) #0 {
+define amdgpu_kernel void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> addrspace(1)* noalias %in) #0 {
%vec = load <3 x i32>, <3 x i32> addrspace(1)* %in, align 4
store <3 x i32> %vec, <3 x i32> addrspace(1)* %out
ret void
@@ -646,7 +646,7 @@ define void @copy_v3i32_align4(<3 x i32>
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
; GCN: ScratchSize: 0{{$}}
-define void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> addrspace(1)* noalias %in) #0 {
+define amdgpu_kernel void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> addrspace(1)* noalias %in) #0 {
%vec = load <3 x i64>, <3 x i64> addrspace(1)* %in, align 4
store <3 x i64> %vec, <3 x i64> addrspace(1)* %out
ret void
@@ -662,7 +662,7 @@ define void @copy_v3i64_align4(<3 x i64>
; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
; GCN: ScratchSize: 0{{$}}
-define void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x float> addrspace(1)* noalias %in) #0 {
+define amdgpu_kernel void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x float> addrspace(1)* noalias %in) #0 {
%vec = load <3 x float>, <3 x float> addrspace(1)* %in, align 4
%fadd = fadd <3 x float> %vec, <float 1.0, float 2.0, float 4.0>
store <3 x float> %fadd, <3 x float> addrspace(1)* %out
@@ -679,7 +679,7 @@ define void @copy_v3f32_align4(<3 x floa
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
; GCN: ScratchSize: 0{{$}}
-define void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias %out, <3 x double> addrspace(1)* noalias %in) #0 {
+define amdgpu_kernel void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias %out, <3 x double> addrspace(1)* noalias %in) #0 {
%vec = load <3 x double>, <3 x double> addrspace(1)* %in, align 4
%fadd = fadd <3 x double> %vec, <double 1.0, double 2.0, double 4.0>
store <3 x double> %fadd, <3 x double> addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/min.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/min.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/min.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/min.ll Tue Mar 21 16:39:51 2017
@@ -7,7 +7,7 @@
; GCN: v_min_i32_e32
; EG: MIN_INT
-define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
%a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
%b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
@@ -24,7 +24,7 @@ define void @v_test_imin_sle_i32(i32 add
; GCN: s_min_i32
; EG: MIN_INT
-define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
%cmp = icmp sle i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %out, align 4
@@ -35,7 +35,7 @@ define void @s_test_imin_sle_i32(i32 add
; GCN: s_min_i32
; EG: MIN_INT
-define void @s_test_imin_sle_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) #0 {
+define amdgpu_kernel void @s_test_imin_sle_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) #0 {
%cmp = icmp sle <1 x i32> %a, %b
%val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b
store <1 x i32> %val, <1 x i32> addrspace(1)* %out
@@ -52,7 +52,7 @@ define void @s_test_imin_sle_v1i32(<1 x
; EG: MIN_INT
; EG: MIN_INT
; EG: MIN_INT
-define void @s_test_imin_sle_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) #0 {
+define amdgpu_kernel void @s_test_imin_sle_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) #0 {
%cmp = icmp sle <4 x i32> %a, %b
%val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
store <4 x i32> %val, <4 x i32> addrspace(1)* %out
@@ -65,7 +65,7 @@ define void @s_test_imin_sle_v4i32(<4 x
; GCN: s_sext_i32_i8
; GCN: s_sext_i32_i8
; GCN: s_min_i32
-define void @s_test_imin_sle_i8(i8 addrspace(1)* %out, i8 %a, i8 %b) #0 {
+define amdgpu_kernel void @s_test_imin_sle_i8(i8 addrspace(1)* %out, i8 %a, i8 %b) #0 {
%cmp = icmp sle i8 %a, %b
%val = select i1 %cmp, i8 %a, i8 %b
store i8 %val, i8 addrspace(1)* %out
@@ -106,7 +106,7 @@ define void @s_test_imin_sle_i8(i8 addrs
; EG: MIN_INT
; EG: MIN_INT
; EG: MIN_INT
-define void @s_test_imin_sle_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b) #0 {
+define amdgpu_kernel void @s_test_imin_sle_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b) #0 {
%cmp = icmp sle <4 x i8> %a, %b
%val = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b
store <4 x i8> %val, <4 x i8> addrspace(1)* %out
@@ -124,7 +124,7 @@ define void @s_test_imin_sle_v4i8(<4 x i
; EG: MIN_INT
; EG: MIN_INT
-define void @s_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 {
+define amdgpu_kernel void @s_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 {
%cmp = icmp sle <2 x i16> %a, %b
%val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
store <2 x i16> %val, <2 x i16> addrspace(1)* %out
@@ -150,7 +150,7 @@ define void @s_test_imin_sle_v2i16(<2 x
; EG: MIN_INT
; EG: MIN_INT
; EG: MIN_INT
-define void @s_test_imin_sle_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) #0 {
+define amdgpu_kernel void @s_test_imin_sle_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) #0 {
%cmp = icmp sle <4 x i16> %a, %b
%val = select <4 x i1> %cmp, <4 x i16> %a, <4 x i16> %b
store <4 x i16> %val, <4 x i16> addrspace(1)* %out
@@ -161,7 +161,7 @@ define void @s_test_imin_sle_v4i16(<4 x
; GCN: v_min_i32_e32
; EG: MIN_INT
-define void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #0 {
+define amdgpu_kernel void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
%a.gep = getelementptr inbounds i32, i32 addrspace(1)* %aptr, i32 %tid
%b.gep = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid
@@ -180,7 +180,7 @@ define void @v_test_imin_slt_i32(i32 add
; GFX89: v_min_i16_e32
; EG: MIN_INT
-define void @v_test_imin_slt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) #0 {
+define amdgpu_kernel void @v_test_imin_slt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
%a.gep = getelementptr inbounds i16, i16 addrspace(1)* %aptr, i32 %tid
%b.gep = getelementptr inbounds i16, i16 addrspace(1)* %bptr, i32 %tid
@@ -198,7 +198,7 @@ define void @v_test_imin_slt_i16(i16 add
; GCN: s_min_i32
; EG: MIN_INT
-define void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
%cmp = icmp slt i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %out, align 4
@@ -211,7 +211,7 @@ define void @s_test_imin_slt_i32(i32 add
; EG: MIN_INT
; EG: MIN_INT
-define void @s_test_imin_slt_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
+define amdgpu_kernel void @s_test_imin_slt_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
%cmp = icmp slt <2 x i32> %a, %b
%val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b
store <2 x i32> %val, <2 x i32> addrspace(1)* %out
@@ -222,7 +222,7 @@ define void @s_test_imin_slt_v2i32(<2 x
; GCN: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
; EG: MIN_INT {{.*}}literal.{{[xyzw]}}
-define void @s_test_imin_slt_imm_i32(i32 addrspace(1)* %out, i32 %a) #0 {
+define amdgpu_kernel void @s_test_imin_slt_imm_i32(i32 addrspace(1)* %out, i32 %a) #0 {
%cmp = icmp slt i32 %a, 8
%val = select i1 %cmp, i32 %a, i32 8
store i32 %val, i32 addrspace(1)* %out, align 4
@@ -233,7 +233,7 @@ define void @s_test_imin_slt_imm_i32(i32
; GCN: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
; EG: MIN_INT {{.*}}literal.{{[xyzw]}}
-define void @s_test_imin_sle_imm_i32(i32 addrspace(1)* %out, i32 %a) #0 {
+define amdgpu_kernel void @s_test_imin_sle_imm_i32(i32 addrspace(1)* %out, i32 %a) #0 {
%cmp = icmp sle i32 %a, 8
%val = select i1 %cmp, i32 %a, i32 8
store i32 %val, i32 addrspace(1)* %out, align 4
@@ -244,7 +244,7 @@ define void @s_test_imin_sle_imm_i32(i32
; GCN: v_min_u32_e32
; EG: MIN_UINT
-define void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
%a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
%b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
@@ -267,7 +267,7 @@ define void @v_test_umin_ule_i32(i32 add
; EG: MIN_UINT
; EG: MIN_UINT
; EG: MIN_UINT
-define void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %a.ptr, <3 x i32> addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %a.ptr, <3 x i32> addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
%a.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %a.ptr, i32 %tid
%b.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %b.ptr, i32 %tid
@@ -301,7 +301,7 @@ define void @v_test_umin_ule_v3i32(<3 x
; EG: MIN_UINT
; EG: MIN_UINT
; EG: MIN_UINT
-define void @v_test_umin_ule_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %a.ptr, <3 x i16> addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_test_umin_ule_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %a.ptr, <3 x i16> addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
%a.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %a.ptr, i32 %tid
%b.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %b.ptr, i32 %tid
@@ -319,7 +319,7 @@ define void @v_test_umin_ule_v3i16(<3 x
; GCN: s_min_u32
; EG: MIN_UINT
-define void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
%cmp = icmp ule i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %out, align 4
@@ -330,7 +330,7 @@ define void @s_test_umin_ule_i32(i32 add
; GCN: v_min_u32_e32
; EG: MIN_UINT
-define void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
%a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
%b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
@@ -353,7 +353,7 @@ define void @v_test_umin_ult_i32(i32 add
; GFX89: v_min_u16_e32
; EG: MIN_UINT
-define void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %a.ptr, i8 addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %a.ptr, i8 addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
%a.gep = getelementptr inbounds i8, i8 addrspace(1)* %a.ptr, i32 %tid
%b.gep = getelementptr inbounds i8, i8 addrspace(1)* %b.ptr, i32 %tid
@@ -371,7 +371,7 @@ define void @v_test_umin_ult_i8(i8 addrs
; GCN: s_min_u32
; EG: MIN_UINT
-define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
%cmp = icmp ult i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %out, align 4
@@ -386,7 +386,7 @@ define void @s_test_umin_ult_i32(i32 add
; GCN: s_endpgm
; EG-NOT: MIN_UINT
-define void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace(1)* %out1, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #0 {
+define amdgpu_kernel void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace(1)* %out1, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #0 {
%a = load i32, i32 addrspace(1)* %aptr, align 4
%b = load i32, i32 addrspace(1)* %bptr, align 4
%cmp = icmp ult i32 %a, %b
@@ -404,7 +404,7 @@ define void @v_test_umin_ult_i32_multi_u
; GCN: s_endpgm
; EG-NOT: MIN_UINT
-define void @v_test_umin_ult_i16_multi_use(i16 addrspace(1)* %out0, i1 addrspace(1)* %out1, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) #0 {
+define amdgpu_kernel void @v_test_umin_ult_i16_multi_use(i16 addrspace(1)* %out0, i1 addrspace(1)* %out1, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) #0 {
%a = load i16, i16 addrspace(1)* %aptr, align 2
%b = load i16, i16 addrspace(1)* %bptr, align 2
%cmp = icmp ult i16 %a, %b
@@ -419,7 +419,7 @@ define void @v_test_umin_ult_i16_multi_u
; GCN: s_min_u32
; EG: MIN_UINT
-define void @s_test_umin_ult_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) #0 {
+define amdgpu_kernel void @s_test_umin_ult_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) #0 {
%cmp = icmp ult <1 x i32> %a, %b
%val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b
store <1 x i32> %val, <1 x i32> addrspace(1)* %out
@@ -444,7 +444,7 @@ define void @s_test_umin_ult_v1i32(<1 x
; EG: MIN_UINT
; EG: MIN_UINT
; EG: MIN_UINT
-define void @s_test_umin_ult_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) #0 {
+define amdgpu_kernel void @s_test_umin_ult_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) #0 {
%cmp = icmp ult <8 x i32> %a, %b
%val = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
store <8 x i32> %val, <8 x i32> addrspace(1)* %out
@@ -478,7 +478,7 @@ define void @s_test_umin_ult_v8i32(<8 x
; EG: MIN_UINT
; EG: MIN_UINT
; EG: MIN_UINT
-define void @s_test_umin_ult_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) #0 {
+define amdgpu_kernel void @s_test_umin_ult_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) #0 {
%cmp = icmp ult <8 x i16> %a, %b
%val = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
store <8 x i16> %val, <8 x i16> addrspace(1)* %out
@@ -494,7 +494,7 @@ define void @s_test_umin_ult_v8i16(<8 x
; GCN: buffer_store_dword [[VMIN]]
; EG: MIN_UINT
-define void @simplify_demanded_bits_test_umin_ult_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) #0 {
+define amdgpu_kernel void @simplify_demanded_bits_test_umin_ult_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) #0 {
%a.ext = zext i16 %a to i32
%b.ext = zext i16 %b to i32
%cmp = icmp ult i32 %a.ext, %b.ext
@@ -514,7 +514,7 @@ define void @simplify_demanded_bits_test
; GCN: buffer_store_dword [[VMIN]]
; EG: MIN_INT
-define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) #0 {
+define amdgpu_kernel void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) #0 {
%a.ext = sext i16 %a to i32
%b.ext = sext i16 %b to i32
%cmp = icmp slt i32 %a.ext, %b.ext
@@ -529,7 +529,7 @@ define void @simplify_demanded_bits_test
; GCN: s_min_i32
; EG: MIN_INT
-define void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) #0 {
+define amdgpu_kernel void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) #0 {
%cmp = icmp sle i16 %a, %b
%val = select i1 %cmp, i16 %a, i16 %b
store i16 %val, i16 addrspace(1)* %out
@@ -542,7 +542,7 @@ define void @s_test_imin_sle_i16(i16 add
; EG: MIN_UINT
; EG: MIN_UINT
-define void @test_umin_ult_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @test_umin_ult_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
%tmp = icmp ult i64 %a, %b
%val = select i1 %tmp, i64 %a, i64 %b
store i64 %val, i64 addrspace(1)* %out, align 8
@@ -554,7 +554,7 @@ define void @test_umin_ult_i64(i64 addrs
; EG: MIN_UINT
; EG: MIN_UINT
-define void @test_umin_ule_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @test_umin_ule_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
%tmp = icmp ule i64 %a, %b
%val = select i1 %tmp, i64 %a, i64 %b
store i64 %val, i64 addrspace(1)* %out, align 8
@@ -566,7 +566,7 @@ define void @test_umin_ule_i64(i64 addrs
; EG-DAG: MIN_UINT
; EG-DAG: MIN_INT
-define void @test_imin_slt_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @test_imin_slt_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
%tmp = icmp slt i64 %a, %b
%val = select i1 %tmp, i64 %a, i64 %b
store i64 %val, i64 addrspace(1)* %out, align 8
@@ -578,7 +578,7 @@ define void @test_imin_slt_i64(i64 addrs
; EG-DAG: MIN_UINT
; EG-DAG: MIN_INT
-define void @test_imin_sle_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @test_imin_sle_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
%tmp = icmp sle i64 %a, %b
%val = select i1 %tmp, i64 %a, i64 %b
store i64 %val, i64 addrspace(1)* %out, align 8
@@ -596,7 +596,7 @@ define void @test_imin_sle_i64(i64 addrs
; EG: MIN_INT
; EG: MIN_INT
-define void @v_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
%a.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a.ptr, i32 %tid
%b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b.ptr, i32 %tid
@@ -621,7 +621,7 @@ define void @v_test_imin_sle_v2i16(<2 x
; EG: MIN_UINT
; EG: MIN_UINT
-define void @v_test_imin_ule_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_test_imin_ule_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
%a.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a.ptr, i32 %tid
%b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b.ptr, i32 %tid
Modified: llvm/trunk/test/CodeGen/AMDGPU/min3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/min3.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/min3.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/min3.ll Tue Mar 21 16:39:51 2017
@@ -4,7 +4,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
; FUNC-LABEL: @v_test_imin3_slt_i32
; SI: v_min3_i32
-define void @v_test_imin3_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+define amdgpu_kernel void @v_test_imin3_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
@@ -23,7 +23,7 @@ define void @v_test_imin3_slt_i32(i32 ad
; FUNC-LABEL: @v_test_umin3_ult_i32
; SI: v_min3_u32
-define void @v_test_umin3_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+define amdgpu_kernel void @v_test_umin3_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
@@ -43,7 +43,7 @@ define void @v_test_umin3_ult_i32(i32 ad
; FUNC-LABEL: @v_test_umin_umin_umin
; SI: v_min_i32
; SI: v_min3_i32
-define void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+define amdgpu_kernel void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%tid2 = mul i32 %tid, 2
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
@@ -77,7 +77,7 @@ define void @v_test_umin_umin_umin(i32 a
; FUNC-LABEL: @v_test_umin3_2_uses
; SI-NOT: v_min3
-define void @v_test_umin3_2_uses(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+define amdgpu_kernel void @v_test_umin3_2_uses(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%tid2 = mul i32 %tid, 2
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
Modified: llvm/trunk/test/CodeGen/AMDGPU/missing-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/missing-store.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/missing-store.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/missing-store.ll Tue Mar 21 16:39:51 2017
@@ -15,7 +15,7 @@
; SI: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}
; SI: buffer_store_dword
; SI: s_endpgm
-define void @missing_store_reduced(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
+define amdgpu_kernel void @missing_store_reduced(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
%ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @ptr_load, align 8
%ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
Modified: llvm/trunk/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll Tue Mar 21 16:39:51 2017
@@ -19,7 +19,7 @@
; GCN: v_addc_u32_e32 v[[PTRHI:[0-9]+]], vcc, v[[LDPTRHI]], v[[VARG1HI]]
; GCN: buffer_load_ubyte v{{[0-9]+}}, v{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}},
-define void @clobber_vgpr_pair_pointer_add(i64 %arg1, i8 addrspace(1)* addrspace(1)* %ptrarg, i32 %arg3) #0 {
+define amdgpu_kernel void @clobber_vgpr_pair_pointer_add(i64 %arg1, i8 addrspace(1)* addrspace(1)* %ptrarg, i32 %arg3) #0 {
bb:
%tmp = icmp sgt i32 %arg3, 0
br i1 %tmp, label %bb4, label %bb17
Modified: llvm/trunk/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll Tue Mar 21 16:39:51 2017
@@ -11,7 +11,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
; GCN-LABEL: {{^}}atomic_max_i32:
; GCN: buffer_atomic_smax v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:400 glc{{$}}
-define void @atomic_max_i32(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %in, i32 addrspace(1)* %x, i32 %y) #0 {
+define amdgpu_kernel void @atomic_max_i32(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %in, i32 addrspace(1)* %x, i32 %y) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i32 %tid
%ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep
@@ -31,7 +31,7 @@ exit:
; GCN-LABEL: {{^}}atomic_max_i32_noret:
; GCN: buffer_atomic_smax v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:400{{$}}
-define void @atomic_max_i32_noret(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %in, i32 addrspace(1)* %x, i32 %y) #0 {
+define amdgpu_kernel void @atomic_max_i32_noret(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %in, i32 addrspace(1)* %x, i32 %y) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i32 %tid
%ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep
Modified: llvm/trunk/test/CodeGen/AMDGPU/mubuf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mubuf.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mubuf.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mubuf.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
; MUBUF load with an immediate byte offset that fits into 12-bits
; CHECK-LABEL: {{^}}mubuf_load0:
; CHECK: buffer_load_dword v{{[0-9]}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4 ; encoding: [0x04,0x00,0x30,0xe0
-define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
%0 = getelementptr i32, i32 addrspace(1)* %in, i64 1
%1 = load i32, i32 addrspace(1)* %0
@@ -20,7 +20,7 @@ entry:
; MUBUF load with the largest possible immediate offset
; CHECK-LABEL: {{^}}mubuf_load1:
; CHECK: buffer_load_ubyte v{{[0-9]}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0
-define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
+define amdgpu_kernel void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%0 = getelementptr i8, i8 addrspace(1)* %in, i64 4095
%1 = load i8, i8 addrspace(1)* %0
@@ -32,7 +32,7 @@ entry:
; CHECK-LABEL: {{^}}mubuf_load2:
; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x1000
; CHECK: buffer_load_dword v{{[0-9]}}, off, s[{{[0-9]+:[0-9]+}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x30,0xe0
-define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
%0 = getelementptr i32, i32 addrspace(1)* %in, i64 1024
%1 = load i32, i32 addrspace(1)* %0
@@ -44,7 +44,7 @@ entry:
; CHECK-LABEL: {{^}}mubuf_load3:
; CHECK-NOT: ADD
; CHECK: buffer_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:4 ; encoding: [0x04,0x80,0x30,0xe0
-define void @mubuf_load3(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i64 %offset) {
+define amdgpu_kernel void @mubuf_load3(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i64 %offset) {
entry:
%0 = getelementptr i32, i32 addrspace(1)* %in, i64 %offset
%1 = getelementptr i32, i32 addrspace(1)* %0, i64 1
@@ -91,7 +91,7 @@ main_body:
; MUBUF store with an immediate byte offset that fits into 12-bits
; CHECK-LABEL: {{^}}mubuf_store0:
; CHECK: buffer_store_dword v{{[0-9]}}, off, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x70,0xe0
-define void @mubuf_store0(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @mubuf_store0(i32 addrspace(1)* %out) {
entry:
%0 = getelementptr i32, i32 addrspace(1)* %out, i64 1
store i32 0, i32 addrspace(1)* %0
@@ -102,7 +102,7 @@ entry:
; CHECK-LABEL: {{^}}mubuf_store1:
; CHECK: buffer_store_byte v{{[0-9]}}, off, s[{{[0-9]:[0-9]}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0
-define void @mubuf_store1(i8 addrspace(1)* %out) {
+define amdgpu_kernel void @mubuf_store1(i8 addrspace(1)* %out) {
entry:
%0 = getelementptr i8, i8 addrspace(1)* %out, i64 4095
store i8 0, i8 addrspace(1)* %0
@@ -113,7 +113,7 @@ entry:
; CHECK-LABEL: {{^}}mubuf_store2:
; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x1000
; CHECK: buffer_store_dword v{{[0-9]}}, off, s[{{[0-9]:[0-9]}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x70,0xe0
-define void @mubuf_store2(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @mubuf_store2(i32 addrspace(1)* %out) {
entry:
%0 = getelementptr i32, i32 addrspace(1)* %out, i64 1024
store i32 0, i32 addrspace(1)* %0
@@ -124,7 +124,7 @@ entry:
; CHECK-LABEL: {{^}}mubuf_store3:
; CHECK-NOT: ADD
; CHECK: buffer_store_dword v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:4 ; encoding: [0x04,0x80,0x70,0xe0
-define void @mubuf_store3(i32 addrspace(1)* %out, i64 %offset) {
+define amdgpu_kernel void @mubuf_store3(i32 addrspace(1)* %out, i64 %offset) {
entry:
%0 = getelementptr i32, i32 addrspace(1)* %out, i64 %offset
%1 = getelementptr i32, i32 addrspace(1)* %0, i64 1
@@ -134,14 +134,14 @@ entry:
; CHECK-LABEL: {{^}}store_sgpr_ptr:
; CHECK: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0
-define void @store_sgpr_ptr(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @store_sgpr_ptr(i32 addrspace(1)* %out) #0 {
store i32 99, i32 addrspace(1)* %out, align 4
ret void
}
; CHECK-LABEL: {{^}}store_sgpr_ptr_offset:
; CHECK: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:40
-define void @store_sgpr_ptr_offset(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @store_sgpr_ptr_offset(i32 addrspace(1)* %out) #0 {
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 10
store i32 99, i32 addrspace(1)* %out.gep, align 4
ret void
@@ -150,7 +150,7 @@ define void @store_sgpr_ptr_offset(i32 a
; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset:
; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000
; CHECK: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]]
-define void @store_sgpr_ptr_large_offset(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @store_sgpr_ptr_large_offset(i32 addrspace(1)* %out) #0 {
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 32768
store i32 99, i32 addrspace(1)* %out.gep, align 4
ret void
@@ -159,7 +159,7 @@ define void @store_sgpr_ptr_large_offset
; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset_atomic:
; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000
; CHECK: buffer_atomic_add v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]]
-define void @store_sgpr_ptr_large_offset_atomic(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @store_sgpr_ptr_large_offset_atomic(i32 addrspace(1)* %out) #0 {
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 32768
%val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 5 seq_cst
ret void
@@ -167,7 +167,7 @@ define void @store_sgpr_ptr_large_offset
; CHECK-LABEL: {{^}}store_vgpr_ptr:
; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
-define void @store_vgpr_ptr(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @store_vgpr_ptr(i32 addrspace(1)* %out) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
store i32 99, i32 addrspace(1)* %out.gep, align 4
Modified: llvm/trunk/test/CodeGen/AMDGPU/mul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mul.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mul.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mul.ll Tue Mar 21 16:39:51 2017
@@ -11,7 +11,7 @@
; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-define void @test_mul_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @test_mul_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
%a = load <2 x i32>, <2 x i32> addrspace(1) * %in
%b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
@@ -31,7 +31,7 @@ define void @test_mul_v2i32(<2 x i32> ad
; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-define void @v_mul_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @v_mul_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
%a = load <4 x i32>, <4 x i32> addrspace(1) * %in
%b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
@@ -45,7 +45,7 @@ define void @v_mul_v4i32(<4 x i32> addrs
; SI: s_load_dword
; SI: s_mul_i32
; SI: buffer_store_dword
-define void @s_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @s_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
%mul = mul i64 %b, %a
%trunc = trunc i64 %mul to i32
store i32 %trunc, i32 addrspace(1)* %out, align 8
@@ -57,7 +57,7 @@ define void @s_trunc_i64_mul_to_i32(i32
; SI: s_load_dword
; SI: v_mul_lo_i32
; SI: buffer_store_dword
-define void @v_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
+define amdgpu_kernel void @v_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%a = load i64, i64 addrspace(1)* %aptr, align 8
%b = load i64, i64 addrspace(1)* %bptr, align 8
%mul = mul i64 %b, %a
@@ -73,7 +73,7 @@ define void @v_trunc_i64_mul_to_i32(i32
; EG-DAG: MULHI_INT
; SI-DAG: s_mul_i32
; SI-DAG: v_mul_hi_i32
-define void @mul64_sext_c(i64 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @mul64_sext_c(i64 addrspace(1)* %out, i32 %in) {
entry:
%0 = sext i32 %in to i64
%1 = mul i64 %0, 80
@@ -87,7 +87,7 @@ entry:
; SI-DAG: v_mul_lo_i32
; SI-DAG: v_mul_hi_i32
; SI: s_endpgm
-define void @v_mul64_sext_c(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @v_mul64_sext_c(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
%val = load i32, i32 addrspace(1)* %in, align 4
%ext = sext i32 %val to i64
%mul = mul i64 %ext, 80
@@ -99,7 +99,7 @@ define void @v_mul64_sext_c(i64 addrspac
; SI-DAG: v_mul_lo_i32 v{{[0-9]+}}, v{{[0-9]+}}, 9
; SI-DAG: v_mul_hi_i32 v{{[0-9]+}}, v{{[0-9]+}}, 9
; SI: s_endpgm
-define void @v_mul64_sext_inline_imm(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @v_mul64_sext_inline_imm(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
%val = load i32, i32 addrspace(1)* %in, align 4
%ext = sext i32 %val to i64
%mul = mul i64 %ext, 9
@@ -114,7 +114,7 @@ define void @v_mul64_sext_inline_imm(i64
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; SI: buffer_store_dword [[VRESULT]],
; SI: s_endpgm
-define void @s_mul_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @s_mul_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%mul = mul i32 %a, %b
store i32 %mul, i32 addrspace(1)* %out, align 4
ret void
@@ -122,7 +122,7 @@ define void @s_mul_i32(i32 addrspace(1)*
; FUNC-LABEL: {{^}}v_mul_i32:
; SI: v_mul_lo_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @v_mul_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @v_mul_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%a = load i32, i32 addrspace(1)* %in
%b = load i32, i32 addrspace(1)* %b_ptr
@@ -139,7 +139,7 @@ define void @v_mul_i32(i32 addrspace(1)*
; crash with a 'failed to select' error.
; FUNC-LABEL: {{^}}s_mul_i64:
-define void @s_mul_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
+define amdgpu_kernel void @s_mul_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%mul = mul i64 %a, %b
store i64 %mul, i64 addrspace(1)* %out, align 8
ret void
@@ -147,7 +147,7 @@ define void @s_mul_i64(i64 addrspace(1)*
; FUNC-LABEL: {{^}}v_mul_i64:
; SI: v_mul_lo_i32
-define void @v_mul_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
+define amdgpu_kernel void @v_mul_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
%a = load i64, i64 addrspace(1)* %aptr, align 8
%b = load i64, i64 addrspace(1)* %bptr, align 8
%mul = mul i64 %a, %b
@@ -157,7 +157,7 @@ define void @v_mul_i64(i64 addrspace(1)*
; FUNC-LABEL: {{^}}mul32_in_branch:
; SI: s_mul_i32
-define void @mul32_in_branch(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b, i32 %c) {
+define amdgpu_kernel void @mul32_in_branch(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b, i32 %c) {
entry:
%0 = icmp eq i32 %a, 0
br i1 %0, label %if, label %else
@@ -180,7 +180,7 @@ endif:
; SI-DAG: s_mul_i32
; SI-DAG: v_mul_hi_u32
; SI: s_endpgm
-define void @mul64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
+define amdgpu_kernel void @mul64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
entry:
%0 = icmp eq i64 %a, 0
br i1 %0, label %if, label %else
@@ -224,7 +224,7 @@ endif:
; SI: s_mul_i32
; SI: buffer_store_dwordx4
-define void @s_mul_i128(i128 addrspace(1)* %out, i128 %a, i128 %b) nounwind #0 {
+define amdgpu_kernel void @s_mul_i128(i128 addrspace(1)* %out, i128 %a, i128 %b) nounwind #0 {
%mul = mul i128 %a, %b
store i128 %mul, i128 addrspace(1)* %out
ret void
@@ -253,7 +253,7 @@ define void @s_mul_i128(i128 addrspace(1
; SI-DAG: v_mul_lo_i32
; SI: {{buffer|flat}}_store_dwordx4
-define void @v_mul_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %aptr, i128 addrspace(1)* %bptr) #0 {
+define amdgpu_kernel void @v_mul_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %aptr, i128 addrspace(1)* %bptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
%gep.a = getelementptr inbounds i128, i128 addrspace(1)* %aptr, i32 %tid
%gep.b = getelementptr inbounds i128, i128 addrspace(1)* %bptr, i32 %tid
Modified: llvm/trunk/test/CodeGen/AMDGPU/mul_int24.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mul_int24.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mul_int24.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mul_int24.ll Tue Mar 21 16:39:51 2017
@@ -13,7 +13,7 @@
; Make sure we are not masking the inputs
; CM-NOT: AND
; CM: MUL_INT24
-define void @test_smul24_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @test_smul24_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
entry:
%a.shl = shl i32 %a, 8
%a.24 = ashr i32 %a.shl, 8
@@ -39,7 +39,7 @@ entry:
; CM: MULHI_INT24
; CM: MULHI_INT24
; CM: MULHI_INT24
-define void @test_smulhi24_i64(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @test_smulhi24_i64(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
entry:
%a.shl = shl i32 %a, 8
%a.24 = ashr i32 %a.shl, 8
@@ -70,7 +70,7 @@ entry:
; GCN-DAG: v_mul_i32_i24_e32
; GCN: buffer_store_dwordx2
-define void @test_smul24_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @test_smul24_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) #0 {
%shl.i = shl i32 %a, 8
%shr.i = ashr i32 %shl.i, 8
%conv.i = sext i32 %shr.i to i64
@@ -87,7 +87,7 @@ define void @test_smul24_i64(i64 addrspa
; GCN-DAG: v_mul_hi_i32_i24_e64 v{{[0-9]+}}, [[A]], [[A]]
; GCN-DAG: v_mul_i32_i24_e64 v{{[0-9]+}}, [[A]], [[A]]
; GCN: buffer_store_dwordx2
-define void @test_smul24_i64_square(i64 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @test_smul24_i64_square(i64 addrspace(1)* %out, i32 %a, i32 %b) #0 {
%shl.i = shl i32 %a, 8
%shr.i = ashr i32 %shl.i, 8
%conv.i = sext i32 %shr.i to i64
@@ -112,7 +112,7 @@ define void @test_smul24_i64_square(i64
; VI: v_ashrrev_i64 v{{\[[0-9]+:[0-9]+\]}}, 31, v{{\[[0-9]+:[0-9]+\]}}
; GCN: buffer_store_dwordx2
-define void @test_smul24_i33(i64 addrspace(1)* %out, i33 %a, i33 %b) #0 {
+define amdgpu_kernel void @test_smul24_i33(i64 addrspace(1)* %out, i33 %a, i33 %b) #0 {
entry:
%a.shl = shl i33 %a, 9
%a.24 = ashr i33 %a.shl, 9
@@ -133,7 +133,7 @@ entry:
; SI: v_mul_hi_i32_i24_e32 v[[MUL_HI:[0-9]+]],
; SI-NEXT: v_and_b32_e32 v[[HI:[0-9]+]], 1, v[[MUL_HI]]
; SI-NEXT: buffer_store_dword v[[HI]]
-define void @test_smulhi24_i33(i32 addrspace(1)* %out, i33 %a, i33 %b) {
+define amdgpu_kernel void @test_smulhi24_i33(i32 addrspace(1)* %out, i33 %a, i33 %b) {
entry:
%tmp0 = shl i33 %a, 9
%a_24 = ashr i33 %tmp0, 9
@@ -151,7 +151,7 @@ entry:
; GCN: v_mul_i32_i24_e32 v[[VAL_LO:[0-9]+]]
; GCN: v_mov_b32_e32 v[[VAL_HI:[0-9]+]], v[[VAL_LO]]
; GCN: buffer_store_dwordx2 v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}}
-define void @simplify_i24_crash(<2 x i32> addrspace(1)* %out, i32 %arg0, <2 x i32> %arg1, <2 x i32> %arg2) {
+define amdgpu_kernel void @simplify_i24_crash(<2 x i32> addrspace(1)* %out, i32 %arg0, <2 x i32> %arg1, <2 x i32> %arg2) {
bb:
%cmp = icmp eq i32 %arg0, 0
br i1 %cmp, label %bb11, label %bb7
Modified: llvm/trunk/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll Tue Mar 21 16:39:51 2017
@@ -6,7 +6,7 @@ declare i32 @llvm.amdgcn.workitem.id.y()
; FUNC-LABEL: {{^}}test_umul24_i32:
; GCN: v_mul_u32_u24
-define void @test_umul24_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+define amdgpu_kernel void @test_umul24_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = shl i32 %a, 8
%a_24 = lshr i32 %0, 8
@@ -22,7 +22,7 @@ entry:
; SI: v_bfe_i32 v{{[0-9]}}, [[VI_MUL]], 0, 16
; VI: s_mul_i32 [[SI_MUL:s[0-9]]], s{{[0-9]}}, s{{[0-9]}}
; VI: s_sext_i32_i16 s{{[0-9]}}, [[SI_MUL]]
-define void @test_umul24_i16_sext(i32 addrspace(1)* %out, i16 %a, i16 %b) {
+define amdgpu_kernel void @test_umul24_i16_sext(i32 addrspace(1)* %out, i16 %a, i16 %b) {
entry:
%mul = mul i16 %a, %b
%ext = sext i16 %mul to i32
@@ -34,7 +34,7 @@ entry:
; SI: v_mul_u32_u24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
; VI: v_mul_lo_u16_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
; GCN: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 16
-define void @test_umul24_i16_vgpr_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
+define amdgpu_kernel void @test_umul24_i16_vgpr_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
%tid.x = call i32 @llvm.amdgcn.workitem.id.x()
%tid.y = call i32 @llvm.amdgcn.workitem.id.y()
%ptr_a = getelementptr i16, i16 addrspace(1)* %in, i32 %tid.x
@@ -54,7 +54,7 @@ define void @test_umul24_i16_vgpr_sext(i
; VI: s_mul_i32
; VI: s_and_b32
; VI: v_mov_b32_e32
-define void @test_umul24_i16(i32 addrspace(1)* %out, i16 %a, i16 %b) {
+define amdgpu_kernel void @test_umul24_i16(i32 addrspace(1)* %out, i16 %a, i16 %b) {
entry:
%mul = mul i16 %a, %b
%ext = zext i16 %mul to i32
@@ -66,7 +66,7 @@ entry:
; SI: v_mul_u32_u24_e32
; SI: v_and_b32_e32
; VI: v_mul_lo_u16
-define void @test_umul24_i16_vgpr(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
+define amdgpu_kernel void @test_umul24_i16_vgpr(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
%tid.x = call i32 @llvm.amdgcn.workitem.id.x()
%tid.y = call i32 @llvm.amdgcn.workitem.id.y()
%ptr_a = getelementptr i16, i16 addrspace(1)* %in, i32 %tid.x
@@ -83,7 +83,7 @@ define void @test_umul24_i16_vgpr(i32 ad
; SI: v_mul_u32_u24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
; VI: v_mul_lo_u16_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
; GCN: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 8
-define void @test_umul24_i8_vgpr(i32 addrspace(1)* %out, i8 addrspace(1)* %a, i8 addrspace(1)* %b) {
+define amdgpu_kernel void @test_umul24_i8_vgpr(i32 addrspace(1)* %out, i8 addrspace(1)* %a, i8 addrspace(1)* %b) {
entry:
%tid.x = call i32 @llvm.amdgcn.workitem.id.x()
%tid.y = call i32 @llvm.amdgcn.workitem.id.y()
@@ -101,7 +101,7 @@ entry:
; GCN-NOT: and
; GCN: v_mul_hi_u32_u24_e32 [[RESULT:v[0-9]+]],
; GCN-NEXT: buffer_store_dword [[RESULT]]
-define void @test_umulhi24_i32_i64(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+define amdgpu_kernel void @test_umulhi24_i32_i64(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%a.24 = and i32 %a, 16777215
%b.24 = and i32 %b, 16777215
@@ -118,7 +118,7 @@ entry:
; GCN-NOT: and
; GCN: v_mul_hi_u32_u24_e32 [[RESULT:v[0-9]+]],
; GCN-NEXT: buffer_store_dword [[RESULT]]
-define void @test_umulhi24(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @test_umulhi24(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%a.24 = and i64 %a, 16777215
%b.24 = and i64 %b, 16777215
@@ -136,7 +136,7 @@ entry:
; GCN-DAG: v_mul_u32_u24_e32
; GCN-DAG: v_mul_hi_u32_u24_e32
; GCN: buffer_store_dwordx2
-define void @test_umul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @test_umul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%tmp0 = shl i64 %a, 40
%a_24 = lshr i64 %tmp0, 40
@@ -152,7 +152,7 @@ entry:
; GCN-NOT: s_and_b32
; GCN-DAG: v_mul_hi_u32_u24_e64 v{{[0-9]+}}, [[A]], [[A]]
; GCN-DAG: v_mul_u32_u24_e64 v{{[0-9]+}}, [[A]], [[A]]
-define void @test_umul24_i64_square(i64 addrspace(1)* %out, i64 %a) {
+define amdgpu_kernel void @test_umul24_i64_square(i64 addrspace(1)* %out, i64 %a) {
entry:
%tmp0 = shl i64 %a, 40
%a.24 = lshr i64 %tmp0, 40
@@ -166,7 +166,7 @@ entry:
; GCN: s_and_b32
; GCN: v_mul_u32_u24_e32 [[MUL24:v[0-9]+]]
; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, [[MUL24]]
-define void @test_umulhi16_i32(i16 addrspace(1)* %out, i32 %a, i32 %b) {
+define amdgpu_kernel void @test_umulhi16_i32(i16 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%a.16 = and i32 %a, 65535
%b.16 = and i32 %b, 65535
@@ -186,7 +186,7 @@ entry:
; GCN-DAG: v_mul_hi_u32_u24_e32 v[[MUL_HI:[0-9]+]],
; GCN-DAG: v_and_b32_e32 v[[HI:[0-9]+]], 1, v[[MUL_HI]]
; GCN: buffer_store_dwordx2 v{{\[}}[[MUL_LO]]:[[HI]]{{\]}}
-define void @test_umul24_i33(i64 addrspace(1)* %out, i33 %a, i33 %b) {
+define amdgpu_kernel void @test_umul24_i33(i64 addrspace(1)* %out, i33 %a, i33 %b) {
entry:
%tmp0 = shl i33 %a, 9
%a_24 = lshr i33 %tmp0, 9
@@ -206,7 +206,7 @@ entry:
; GCN: v_mul_hi_u32_u24_e32 v[[MUL_HI:[0-9]+]],
; GCN-NEXT: v_and_b32_e32 v[[HI:[0-9]+]], 1, v[[MUL_HI]]
; GCN-NEXT: buffer_store_dword v[[HI]]
-define void @test_umulhi24_i33(i32 addrspace(1)* %out, i33 %a, i33 %b) {
+define amdgpu_kernel void @test_umulhi24_i33(i32 addrspace(1)* %out, i33 %a, i33 %b) {
entry:
%tmp0 = shl i33 %a, 9
%a_24 = lshr i33 %tmp0, 9
Modified: llvm/trunk/test/CodeGen/AMDGPU/mul_uint24-r600.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mul_uint24-r600.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mul_uint24-r600.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mul_uint24-r600.ll Tue Mar 21 16:39:51 2017
@@ -3,7 +3,7 @@
; FUNC-LABEL: {{^}}test_umul24_i32:
; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W
-define void @test_umul24_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+define amdgpu_kernel void @test_umul24_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = shl i32 %a, 8
%a_24 = lshr i32 %0, 8
@@ -19,7 +19,7 @@ entry:
; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]]
; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
; EG: 16
-define void @test_umul24_i16_sext(i32 addrspace(1)* %out, i16 %a, i16 %b) {
+define amdgpu_kernel void @test_umul24_i16_sext(i32 addrspace(1)* %out, i16 %a, i16 %b) {
entry:
%mul = mul i16 %a, %b
%ext = sext i16 %mul to i32
@@ -31,7 +31,7 @@ entry:
; FUNC-LABEL: {{^}}test_umul24_i8:
; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]]
; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
-define void @test_umul24_i8(i32 addrspace(1)* %out, i8 %a, i8 %b) {
+define amdgpu_kernel void @test_umul24_i8(i32 addrspace(1)* %out, i8 %a, i8 %b) {
entry:
%mul = mul i8 %a, %b
%ext = sext i8 %mul to i32
@@ -41,7 +41,7 @@ entry:
; FUNC-LABEL: {{^}}test_umulhi24_i32_i64:
; EG: MULHI_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W
-define void @test_umulhi24_i32_i64(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+define amdgpu_kernel void @test_umulhi24_i32_i64(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%a.24 = and i32 %a, 16777215
%b.24 = and i32 %b, 16777215
@@ -56,7 +56,7 @@ entry:
; FUNC-LABEL: {{^}}test_umulhi24:
; EG: MULHI_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
-define void @test_umulhi24(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @test_umulhi24(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%a.24 = and i64 %a, 16777215
%b.24 = and i64 %b, 16777215
@@ -71,7 +71,7 @@ entry:
; FUNC-LABEL: {{^}}test_umul24_i64:
; EG; MUL_UINT24
; EG: MULHI
-define void @test_umul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @test_umul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%tmp0 = shl i64 %a, 40
%a_24 = lshr i64 %tmp0, 40
Modified: llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll Tue Mar 21 16:39:51 2017
@@ -64,7 +64,7 @@ ENDIF:
br i1 %tmp51, label %LOOP, label %LOOP.outer
}
-; OPT-LABEL: define void @multi_if_break_loop(
+; OPT-LABEL: define amdgpu_kernel void @multi_if_break_loop(
; OPT: llvm.amdgcn.break
; OPT: llvm.amdgcn.loop
; OPT: llvm.amdgcn.if.break
@@ -79,7 +79,7 @@ ENDIF:
; Uses a copy intsead of an or
; GCN: s_mov_b64 [[COPY:s\[[0-9]+:[0-9]+\]]], [[BREAK_REG]]
; GCN: s_or_b64 [[BREAK_REG]], exec, [[COPY]]
-define void @multi_if_break_loop(i32 %arg) #0 {
+define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
bb:
%id = call i32 @llvm.amdgcn.workitem.id.x()
%tmp = sub i32 %id, %arg
Modified: llvm/trunk/test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@
@extern_const_addrspace = external unnamed_addr addrspace(2) constant [5 x i32], align 4
; CHECK-DAG: Name: load_extern_const_init
-define void @load_extern_const_init(i32 addrspace(1)* %out) nounwind {
+define amdgpu_kernel void @load_extern_const_init(i32 addrspace(1)* %out) nounwind {
%val = load i32, i32 addrspace(2)* getelementptr ([5 x i32], [5 x i32] addrspace(2)* @extern_const_addrspace, i64 0, i64 3), align 4
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
@@ -19,7 +19,7 @@ define void @load_extern_const_init(i32
@undef_const_addrspace = unnamed_addr addrspace(2) constant [5 x i32] undef, align 4
; CHECK-DAG: Name: undef_const_addrspace
-define void @load_undef_const_init(i32 addrspace(1)* %out) nounwind {
+define amdgpu_kernel void @load_undef_const_init(i32 addrspace(1)* %out) nounwind {
%val = load i32, i32 addrspace(2)* getelementptr ([5 x i32], [5 x i32] addrspace(2)* @undef_const_addrspace, i64 0, i64 3), align 4
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/no-shrink-extloads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/no-shrink-extloads.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/no-shrink-extloads.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/no-shrink-extloads.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i16:
; SI: s_load_dword s
; SI: buffer_store_short v
-define void @truncate_kernarg_i32_to_i16(i16 addrspace(1)* %out, i32 %arg) nounwind {
+define amdgpu_kernel void @truncate_kernarg_i32_to_i16(i16 addrspace(1)* %out, i32 %arg) nounwind {
%trunc = trunc i32 %arg to i16
store i16 %trunc, i16 addrspace(1)* %out
ret void
@@ -21,7 +21,7 @@ define void @truncate_kernarg_i32_to_i16
; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i16:
; SI: buffer_load_dword v
; SI: buffer_store_short v
-define void @truncate_buffer_load_i32_to_i16(i16 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @truncate_buffer_load_i32_to_i16(i16 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
@@ -34,7 +34,7 @@ define void @truncate_buffer_load_i32_to
; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i8:
; SI: s_load_dword s
; SI: buffer_store_byte v
-define void @truncate_kernarg_i32_to_i8(i8 addrspace(1)* %out, i32 %arg) nounwind {
+define amdgpu_kernel void @truncate_kernarg_i32_to_i8(i8 addrspace(1)* %out, i32 %arg) nounwind {
%trunc = trunc i32 %arg to i8
store i8 %trunc, i8 addrspace(1)* %out
ret void
@@ -43,7 +43,7 @@ define void @truncate_kernarg_i32_to_i8(
; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i8:
; SI: buffer_load_dword v
; SI: buffer_store_byte v
-define void @truncate_buffer_load_i32_to_i8(i8 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @truncate_buffer_load_i32_to_i8(i8 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
@@ -56,7 +56,7 @@ define void @truncate_buffer_load_i32_to
; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i1:
; SI: s_load_dword s
; SI: buffer_store_byte v
-define void @truncate_kernarg_i32_to_i1(i1 addrspace(1)* %out, i32 %arg) nounwind {
+define amdgpu_kernel void @truncate_kernarg_i32_to_i1(i1 addrspace(1)* %out, i32 %arg) nounwind {
%trunc = trunc i32 %arg to i1
store i1 %trunc, i1 addrspace(1)* %out
ret void
@@ -65,7 +65,7 @@ define void @truncate_kernarg_i32_to_i1(
; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i1:
; SI: buffer_load_dword v
; SI: buffer_store_byte v
-define void @truncate_buffer_load_i32_to_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @truncate_buffer_load_i32_to_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i1, i1 addrspace(1)* %out, i32 %tid
@@ -78,7 +78,7 @@ define void @truncate_buffer_load_i32_to
; FUNC-LABEL: {{^}}truncate_kernarg_i64_to_i32:
; SI: s_load_dword s
; SI: buffer_store_dword v
-define void @truncate_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounwind {
+define amdgpu_kernel void @truncate_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounwind {
%trunc = trunc i64 %arg to i32
store i32 %trunc, i32 addrspace(1)* %out
ret void
@@ -87,7 +87,7 @@ define void @truncate_kernarg_i64_to_i32
; FUNC-LABEL: {{^}}truncate_buffer_load_i64_to_i32:
; SI: buffer_load_dword v
; SI: buffer_store_dword v
-define void @truncate_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @truncate_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@@ -100,7 +100,7 @@ define void @truncate_buffer_load_i64_to
; FUNC-LABEL: {{^}}srl_kernarg_i64_to_i32:
; SI: s_load_dword s
; SI: buffer_store_dword v
-define void @srl_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounwind {
+define amdgpu_kernel void @srl_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounwind {
%srl = lshr i64 %arg, 32
%trunc = trunc i64 %srl to i32
store i32 %trunc, i32 addrspace(1)* %out
@@ -110,7 +110,7 @@ define void @srl_kernarg_i64_to_i32(i32
; FUNC-LABEL: {{^}}srl_buffer_load_i64_to_i32:
; SI: buffer_load_dword v
; SI: buffer_store_dword v
-define void @srl_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @srl_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@@ -125,7 +125,7 @@ define void @srl_buffer_load_i64_to_i32(
; FUNC-LABEL: {{^}}truncate_kernarg_i16_to_i8:
; SI: s_load_dword s
; SI: buffer_store_byte v
-define void @truncate_kernarg_i16_to_i8(i8 addrspace(1)* %out, i16 %arg) nounwind {
+define amdgpu_kernel void @truncate_kernarg_i16_to_i8(i8 addrspace(1)* %out, i16 %arg) nounwind {
%trunc = trunc i16 %arg to i8
store i8 %trunc, i8 addrspace(1)* %out
ret void
@@ -134,7 +134,7 @@ define void @truncate_kernarg_i16_to_i8(
; FUNC-LABEL: {{^}}truncate_buffer_load_i16_to_i8:
; SI: buffer_load_ubyte v
; SI: buffer_store_byte v
-define void @truncate_buffer_load_i16_to_i8(i8 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @truncate_buffer_load_i16_to_i8(i8 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.in = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
@@ -147,7 +147,7 @@ define void @truncate_buffer_load_i16_to
; FUNC-LABEL: {{^}}srl_kernarg_i64_to_i8:
; SI: s_load_dword s
; SI: buffer_store_byte v
-define void @srl_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwind {
+define amdgpu_kernel void @srl_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwind {
%srl = lshr i64 %arg, 32
%trunc = trunc i64 %srl to i8
store i8 %trunc, i8 addrspace(1)* %out
@@ -157,7 +157,7 @@ define void @srl_kernarg_i64_to_i8(i8 ad
; FUNC-LABEL: {{^}}srl_buffer_load_i64_to_i8:
; SI: buffer_load_dword v
; SI: buffer_store_byte v
-define void @srl_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @srl_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
@@ -171,7 +171,7 @@ define void @srl_buffer_load_i64_to_i8(i
; FUNC-LABEL: {{^}}truncate_kernarg_i64_to_i8:
; SI: s_load_dword s
; SI: buffer_store_byte v
-define void @truncate_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwind {
+define amdgpu_kernel void @truncate_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwind {
%trunc = trunc i64 %arg to i8
store i8 %trunc, i8 addrspace(1)* %out
ret void
@@ -180,7 +180,7 @@ define void @truncate_kernarg_i64_to_i8(
; FUNC-LABEL: {{^}}truncate_buffer_load_i64_to_i8:
; SI: buffer_load_dword v
; SI: buffer_store_byte v
-define void @truncate_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @truncate_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
@@ -194,7 +194,7 @@ define void @truncate_buffer_load_i64_to
; SI: s_load_dword [[LOAD:s[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0x0
; SI: s_waitcnt lgkmcnt(0)
; SI: s_and_b32 s{{[0-9]+}}, [[LOAD]], 0xffff
-define void @smrd_mask_i32_to_i16(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
+define amdgpu_kernel void @smrd_mask_i32_to_i16(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
entry:
%val = load i32, i32 addrspace(2)* %in
%mask = and i32 %val, 65535
@@ -205,7 +205,7 @@ entry:
; FUNC-LABEL: {{^}}extract_hi_i64_bitcast_v2i32:
; SI: buffer_load_dword v
; SI: buffer_store_dword v
-define void @extract_hi_i64_bitcast_v2i32(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @extract_hi_i64_bitcast_v2i32(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
%ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
%bc = bitcast <2 x i32> %ld to i64
%hi = lshr i64 %bc, 32
Modified: llvm/trunk/test/CodeGen/AMDGPU/opencl-image-metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/opencl-image-metadata.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/opencl-image-metadata.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/opencl-image-metadata.ll Tue Mar 21 16:39:51 2017
@@ -6,7 +6,7 @@
; EG: CF_END
; SI: s_endpgm
-define void @kernel(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @kernel(i32 addrspace(1)* %out) {
entry:
store i32 0, i32 addrspace(1)* %out
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/operand-folding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/operand-folding.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/operand-folding.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/operand-folding.ll Tue Mar 21 16:39:51 2017
@@ -2,7 +2,7 @@
; CHECK-LABEL: {{^}}fold_sgpr:
; CHECK: v_add_i32_e32 v{{[0-9]+}}, vcc, s
-define void @fold_sgpr(i32 addrspace(1)* %out, i32 %fold) {
+define amdgpu_kernel void @fold_sgpr(i32 addrspace(1)* %out, i32 %fold) {
entry:
%tmp0 = icmp ne i32 %fold, 0
br i1 %tmp0, label %if, label %endif
@@ -20,7 +20,7 @@ endif:
; CHECK-LABEL: {{^}}fold_imm:
; CHECK: v_or_b32_e32 v{{[0-9]+}}, 5
-define void @fold_imm(i32 addrspace(1)* %out, i32 %cmp) {
+define amdgpu_kernel void @fold_imm(i32 addrspace(1)* %out, i32 %cmp) {
entry:
%fold = add i32 3, 2
%tmp0 = icmp ne i32 %cmp, 0
@@ -46,7 +46,7 @@ endif:
; CHECK-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[HI]]
; CHECK: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}},
-define void @fold_64bit_constant_add(i64 addrspace(1)* %out, i32 %cmp, i64 %val) {
+define amdgpu_kernel void @fold_64bit_constant_add(i64 addrspace(1)* %out, i32 %cmp, i64 %val) {
entry:
%tmp0 = add i64 %val, 1
store i64 %tmp0, i64 addrspace(1)* %out
@@ -61,7 +61,7 @@ entry:
; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
-define void @vector_inline(<4 x i32> addrspace(1)* %out) {
+define amdgpu_kernel void @vector_inline(<4 x i32> addrspace(1)* %out) {
entry:
%tmp0 = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = add i32 %tmp0, 1
@@ -80,7 +80,7 @@ entry:
; CHECK-LABEL: {{^}}imm_one_use:
; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 0x64, v{{[0-9]+}}
-define void @imm_one_use(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @imm_one_use(i32 addrspace(1)* %out) {
entry:
%tmp0 = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = xor i32 %tmp0, 100
@@ -94,7 +94,7 @@ entry:
; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
-define void @vector_imm(<4 x i32> addrspace(1)* %out) {
+define amdgpu_kernel void @vector_imm(<4 x i32> addrspace(1)* %out) {
entry:
%tmp0 = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = add i32 %tmp0, 1
@@ -114,7 +114,7 @@ entry:
; CHECK: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
; CHECK: v_mac_f32_e32 v[[LO]], 0x41200000, v[[HI]]
; CHECK: buffer_store_dword v[[LO]]
-define void @no_fold_tied_subregister() {
+define amdgpu_kernel void @no_fold_tied_subregister() {
%tmp1 = load volatile <2 x float>, <2 x float> addrspace(1)* undef
%tmp2 = extractelement <2 x float> %tmp1, i32 0
%tmp3 = extractelement <2 x float> %tmp1, i32 1
Modified: llvm/trunk/test/CodeGen/AMDGPU/operand-spacing.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/operand-spacing.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/operand-spacing.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/operand-spacing.ll Tue Mar 21 16:39:51 2017
@@ -11,7 +11,7 @@
; GCN: v_mov_b32_e32 [[VREGB:v[0-9]+]], [[SREGB]]
; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SREGA]], [[VREGB]]
; GCN: buffer_store_dword [[RESULT]],
-define void @add_f32(float addrspace(1)* %out, float %a, float %b) {
+define amdgpu_kernel void @add_f32(float addrspace(1)* %out, float %a, float %b) {
%result = fadd float %a, %b
store float %result, float addrspace(1)* %out
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir Tue Mar 21 16:39:51 2017
@@ -3,7 +3,7 @@
--- |
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
- define void @optimize_if_and_saveexec_xor(i32 %z, i32 %v) #0 {
+ define amdgpu_kernel void @optimize_if_and_saveexec_xor(i32 %z, i32 %v) #0 {
main_body:
%id = call i32 @llvm.amdgcn.workitem.id.x()
%cc = icmp eq i32 %id, 0
@@ -23,7 +23,7 @@
ret void
}
- define void @optimize_if_and_saveexec(i32 %z, i32 %v) #0 {
+ define amdgpu_kernel void @optimize_if_and_saveexec(i32 %z, i32 %v) #0 {
main_body:
br i1 undef, label %if, label %end
@@ -34,7 +34,7 @@
ret void
}
- define void @optimize_if_or_saveexec(i32 %z, i32 %v) #0 {
+ define amdgpu_kernel void @optimize_if_or_saveexec(i32 %z, i32 %v) #0 {
main_body:
br i1 undef, label %if, label %end
@@ -46,7 +46,7 @@
}
- define void @optimize_if_and_saveexec_xor_valu_middle(i32 %z, i32 %v) #0 {
+ define amdgpu_kernel void @optimize_if_and_saveexec_xor_valu_middle(i32 %z, i32 %v) #0 {
main_body:
%id = call i32 @llvm.amdgcn.workitem.id.x()
%cc = icmp eq i32 %id, 0
@@ -67,7 +67,7 @@
ret void
}
- define void @optimize_if_and_saveexec_xor_wrong_reg(i32 %z, i32 %v) #0 {
+ define amdgpu_kernel void @optimize_if_and_saveexec_xor_wrong_reg(i32 %z, i32 %v) #0 {
main_body:
br i1 undef, label %if, label %end
@@ -78,7 +78,7 @@
ret void
}
- define void @optimize_if_and_saveexec_xor_modify_copy_to_exec(i32 %z, i32 %v) #0 {
+ define amdgpu_kernel void @optimize_if_and_saveexec_xor_modify_copy_to_exec(i32 %z, i32 %v) #0 {
main_body:
br i1 undef, label %if, label %end
@@ -89,7 +89,7 @@
ret void
}
- define void @optimize_if_and_saveexec_xor_live_out_setexec(i32 %z, i32 %v) #0 {
+ define amdgpu_kernel void @optimize_if_and_saveexec_xor_live_out_setexec(i32 %z, i32 %v) #0 {
main_body:
br i1 undef, label %if, label %end
@@ -100,7 +100,7 @@
ret void
}
- define void @optimize_if_unknown_saveexec(i32 %z, i32 %v) #0 {
+ define amdgpu_kernel void @optimize_if_unknown_saveexec(i32 %z, i32 %v) #0 {
main_body:
br i1 undef, label %if, label %end
@@ -111,7 +111,7 @@
ret void
}
- define void @optimize_if_andn2_saveexec(i32 %z, i32 %v) #0 {
+ define amdgpu_kernel void @optimize_if_andn2_saveexec(i32 %z, i32 %v) #0 {
main_body:
br i1 undef, label %if, label %end
@@ -122,7 +122,7 @@
ret void
}
- define void @optimize_if_andn2_saveexec_no_commute(i32 %z, i32 %v) #0 {
+ define amdgpu_kernel void @optimize_if_andn2_saveexec_no_commute(i32 %z, i32 %v) #0 {
main_body:
br i1 undef, label %if, label %end
Modified: llvm/trunk/test/CodeGen/AMDGPU/or.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/or.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/or.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/or.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@
; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
%a = load <2 x i32>, <2 x i32> addrspace(1) * %in
%b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
@@ -28,7 +28,7 @@ define void @or_v2i32(<2 x i32> addrspac
; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
%a = load <4 x i32>, <4 x i32> addrspace(1) * %in
%b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
@@ -39,7 +39,7 @@ define void @or_v4i32(<4 x i32> addrspac
; FUNC-LABEL: {{^}}scalar_or_i32:
; SI: s_or_b32
-define void @scalar_or_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+define amdgpu_kernel void @scalar_or_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
%or = or i32 %a, %b
store i32 %or, i32 addrspace(1)* %out
ret void
@@ -47,7 +47,7 @@ define void @scalar_or_i32(i32 addrspace
; FUNC-LABEL: {{^}}vector_or_i32:
; SI: v_or_b32_e32 v{{[0-9]}}
-define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) {
+define amdgpu_kernel void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) {
%loada = load i32, i32 addrspace(1)* %a
%or = or i32 %loada, %b
store i32 %or, i32 addrspace(1)* %out
@@ -56,7 +56,7 @@ define void @vector_or_i32(i32 addrspace
; FUNC-LABEL: {{^}}scalar_or_literal_i32:
; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x1869f
-define void @scalar_or_literal_i32(i32 addrspace(1)* %out, i32 %a) {
+define amdgpu_kernel void @scalar_or_literal_i32(i32 addrspace(1)* %out, i32 %a) {
%or = or i32 %a, 99999
store i32 %or, i32 addrspace(1)* %out, align 4
ret void
@@ -68,7 +68,7 @@ define void @scalar_or_literal_i32(i32 a
; SI-DAG: s_or_b32 s[[RES_LO:[0-9]+]], s[[LO]], 0x3039
; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[RES_LO]]
; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[RES_HI]]
-define void @scalar_or_literal_i64(i64 addrspace(1)* %out, i64 %a) {
+define amdgpu_kernel void @scalar_or_literal_i64(i64 addrspace(1)* %out, i64 %a) {
%or = or i64 %a, 4261135838621753
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -82,7 +82,7 @@ define void @scalar_or_literal_i64(i64 a
; SI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, s[[K_LO]]
; SI: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, s[[K_HI]]
-define void @scalar_or_literal_multi_use_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @scalar_or_literal_multi_use_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
%or = or i64 %a, 4261135838621753
store i64 %or, i64 addrspace(1)* %out
@@ -101,7 +101,7 @@ define void @scalar_or_literal_multi_use
; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[VAL_HI]]
; SI-NOT: or_b32
; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
-define void @scalar_or_inline_imm_i64(i64 addrspace(1)* %out, i64 %a) {
+define amdgpu_kernel void @scalar_or_inline_imm_i64(i64 addrspace(1)* %out, i64 %a) {
%or = or i64 %a, 63
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -111,7 +111,7 @@ define void @scalar_or_inline_imm_i64(i6
; SI-NOT: or_b32
; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 63
; SI-NOT: or_b32
-define void @scalar_or_inline_imm_multi_use_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @scalar_or_inline_imm_multi_use_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
%or = or i64 %a, 63
store i64 %or, i64 addrspace(1)* %out
%foo = add i64 %b, 63
@@ -125,7 +125,7 @@ define void @scalar_or_inline_imm_multi_
; SI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], -1{{$}}
; SI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], [[VAL]]
; SI: buffer_store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
-define void @scalar_or_neg_inline_imm_i64(i64 addrspace(1)* %out, i64 %a) {
+define amdgpu_kernel void @scalar_or_neg_inline_imm_i64(i64 addrspace(1)* %out, i64 %a) {
%or = or i64 %a, -8
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -133,7 +133,7 @@ define void @scalar_or_neg_inline_imm_i6
; FUNC-LABEL: {{^}}vector_or_literal_i32:
; SI: v_or_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
-define void @vector_or_literal_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
+define amdgpu_kernel void @vector_or_literal_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
%loada = load i32, i32 addrspace(1)* %a, align 4
%or = or i32 %loada, 65535
store i32 %or, i32 addrspace(1)* %out, align 4
@@ -142,7 +142,7 @@ define void @vector_or_literal_i32(i32 a
; FUNC-LABEL: {{^}}vector_or_inline_immediate_i32:
; SI: v_or_b32_e32 v{{[0-9]+}}, 4, v{{[0-9]+}}
-define void @vector_or_inline_immediate_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
+define amdgpu_kernel void @vector_or_inline_immediate_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
%loada = load i32, i32 addrspace(1)* %a, align 4
%or = or i32 %loada, 4
store i32 %or, i32 addrspace(1)* %out, align 4
@@ -154,7 +154,7 @@ define void @vector_or_inline_immediate_
; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
; SI: s_or_b64
-define void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
%or = or i64 %a, %b
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -163,7 +163,7 @@ define void @scalar_or_i64(i64 addrspace
; FUNC-LABEL: {{^}}vector_or_i64:
; SI: v_or_b32_e32 v{{[0-9]}}
; SI: v_or_b32_e32 v{{[0-9]}}
-define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+define amdgpu_kernel void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 8
%loadb = load i64, i64 addrspace(1)* %b, align 8
%or = or i64 %loada, %loadb
@@ -174,7 +174,7 @@ define void @vector_or_i64(i64 addrspace
; FUNC-LABEL: {{^}}scalar_vector_or_i64:
; SI: v_or_b32_e32 v{{[0-9]}}
; SI: v_or_b32_e32 v{{[0-9]}}
-define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) {
+define amdgpu_kernel void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) {
%loada = load i64, i64 addrspace(1)* %a
%or = or i64 %loada, %b
store i64 %or, i64 addrspace(1)* %out
@@ -186,7 +186,7 @@ define void @scalar_vector_or_i64(i64 ad
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0xdf77987f, v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0x146f, v[[HI_VREG]]
; SI: s_endpgm
-define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+define amdgpu_kernel void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 8
%or = or i64 %loada, 22470723082367
store i64 %or, i64 addrspace(1)* %out
@@ -200,7 +200,7 @@ define void @vector_or_i64_loadimm(i64 a
; SI-NOT: v_or_b32_e32 {{v[0-9]+}}, 0
; SI: buffer_store_dwordx2 v{{\[}}[[LO_RESULT]]:[[HI_VREG]]{{\]}}
; SI: s_endpgm
-define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+define amdgpu_kernel void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 8
%or = or i64 %loada, 8
store i64 %or, i64 addrspace(1)* %out
@@ -213,7 +213,7 @@ define void @vector_or_i64_imm(i64 addrs
; SI-DAG: v_mov_b32_e32 v[[RES_HI:[0-9]+]], -1{{$}}
; SI: buffer_store_dwordx2 v{{\[}}[[RES_LO]]:[[RES_HI]]{{\]}}
; SI: s_endpgm
-define void @vector_or_i64_neg_inline_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+define amdgpu_kernel void @vector_or_i64_neg_inline_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 8
%or = or i64 %loada, -8
store i64 %or, i64 addrspace(1)* %out
@@ -226,7 +226,7 @@ define void @vector_or_i64_neg_inline_im
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0xffffff38, v[[LO_VREG]]
; SI: buffer_store_dwordx2
; SI: s_endpgm
-define void @vector_or_i64_neg_literal(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+define amdgpu_kernel void @vector_or_i64_neg_literal(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 8
%or = or i64 %loada, -200
store i64 %or, i64 addrspace(1)* %out
@@ -239,7 +239,7 @@ define void @vector_or_i64_neg_literal(i
; SI: s_or_b32 s[[SRESULT:[0-9]+]], s[[SREG1]], s[[SREG0]]
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], s[[SRESULT]]
; SI: buffer_store_dword [[VRESULT]],
-define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
%add = or i64 %b, %a
%trunc = trunc i64 %add to i32
store i32 %trunc, i32 addrspace(1)* %out, align 8
@@ -250,7 +250,7 @@ define void @trunc_i64_or_to_i32(i32 add
; EG: OR_INT * {{\** *}}T{{[0-9]+\.[XYZW], PS, PV\.[XYZW]}}
; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], vcc, s[{{[0-9]+:[0-9]+}}]
-define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
+define amdgpu_kernel void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
%a = load float, float addrspace(1)* %in0
%b = load float, float addrspace(1)* %in1
%acmp = fcmp oge float %a, 0.000000e+00
@@ -263,7 +263,7 @@ define void @or_i1(i32 addrspace(1)* %ou
; FUNC-LABEL: {{^}}s_or_i1:
; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], vcc, s[{{[0-9]+:[0-9]+}}]
-define void @s_or_i1(i1 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
+define amdgpu_kernel void @s_or_i1(i1 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
%cmp0 = icmp eq i32 %a, %b
%cmp1 = icmp eq i32 %c, %d
%or = or i1 %cmp0, %cmp1
Modified: llvm/trunk/test/CodeGen/AMDGPU/over-max-lds-size.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/over-max-lds-size.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/over-max-lds-size.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/over-max-lds-size.ll Tue Mar 21 16:39:51 2017
@@ -6,7 +6,7 @@
@huge = internal unnamed_addr addrspace(3) global [100000 x i32] undef, align 4
-define void @use_huge_lds() {
+define amdgpu_kernel void @use_huge_lds() {
entry:
%v0 = getelementptr inbounds [100000 x i32], [100000 x i32] addrspace(3)* @huge, i32 0, i32 0
store i32 0, i32 addrspace(3)* %v0
Modified: llvm/trunk/test/CodeGen/AMDGPU/pack.v2f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/pack.v2f16.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/pack.v2f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/pack.v2f16.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@
; GFX9: s_load_dword [[VAL1:s[0-9]+]]
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], [[VAL0]], [[VAL1]]
; GFX9: ; use [[PACKED]]
-define void @s_pack_v2f16(i32 addrspace(2)* %in0, i32 addrspace(2)* %in1) #0 {
+define amdgpu_kernel void @s_pack_v2f16(i32 addrspace(2)* %in0, i32 addrspace(2)* %in1) #0 {
%val0 = load volatile i32, i32 addrspace(2)* %in0
%val1 = load volatile i32, i32 addrspace(2)* %in1
%lo.i = trunc i32 %val0 to i16
@@ -28,7 +28,7 @@ define void @s_pack_v2f16(i32 addrspace(
; GFX9: s_load_dword [[VAL1:s[0-9]+]]
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], 0x1234, [[VAL1]]
; GFX9: ; use [[PACKED]]
-define void @s_pack_v2f16_imm_lo(i32 addrspace(2)* %in1) #0 {
+define amdgpu_kernel void @s_pack_v2f16_imm_lo(i32 addrspace(2)* %in1) #0 {
%val1 = load i32, i32 addrspace(2)* %in1
%hi.i = trunc i32 %val1 to i16
%hi = bitcast i16 %hi.i to half
@@ -44,7 +44,7 @@ define void @s_pack_v2f16_imm_lo(i32 add
; GFX9: s_load_dword [[VAL0:s[0-9]+]]
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], [[VAL0]], 0x1234
; GFX9: ; use [[PACKED]]
-define void @s_pack_v2f16_imm_hi(i32 addrspace(2)* %in0) #0 {
+define amdgpu_kernel void @s_pack_v2f16_imm_hi(i32 addrspace(2)* %in0) #0 {
%val0 = load i32, i32 addrspace(2)* %in0
%lo.i = trunc i32 %val0 to i16
%lo = bitcast i16 %lo.i to half
@@ -64,7 +64,7 @@ define void @s_pack_v2f16_imm_hi(i32 add
; GFX9-FLUSH: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VAL0]]
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[ELT0]]
; GFX9: ; use [[PACKED]]
-define void @v_pack_v2f16(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 {
+define amdgpu_kernel void @v_pack_v2f16(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in0.gep = getelementptr inbounds i32, i32 addrspace(1)* %in0, i64 %tid.ext
@@ -91,7 +91,7 @@ define void @v_pack_v2f16(i32 addrspace(
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[ELT0]]
; GFX9: v_add_i32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]]
-define void @v_pack_v2f16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 {
+define amdgpu_kernel void @v_pack_v2f16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in0.gep = getelementptr inbounds i32, i32 addrspace(1)* %in0, i64 %tid.ext
@@ -118,7 +118,7 @@ define void @v_pack_v2f16_user(i32 addrs
; GFX9-FLUSH-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234{{$}}
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]]
; GFX9: ; use [[PACKED]]
-define void @v_pack_v2f16_imm_lo(i32 addrspace(1)* %in1) #0 {
+define amdgpu_kernel void @v_pack_v2f16_imm_lo(i32 addrspace(1)* %in1) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in1.gep = getelementptr inbounds i32, i32 addrspace(1)* %in1, i64 %tid.ext
@@ -140,7 +140,7 @@ define void @v_pack_v2f16_imm_lo(i32 add
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]]
; GFX9: ; use [[PACKED]]
-define void @v_pack_v2f16_inline_imm_lo(i32 addrspace(1)* %in1) #0 {
+define amdgpu_kernel void @v_pack_v2f16_inline_imm_lo(i32 addrspace(1)* %in1) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in1.gep = getelementptr inbounds i32, i32 addrspace(1)* %in1, i64 %tid.ext
@@ -164,7 +164,7 @@ define void @v_pack_v2f16_inline_imm_lo(
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[MASKED]]
; GFX9: ; use [[PACKED]]
-define void @v_pack_v2f16_imm_hi(i32 addrspace(1)* %in0) #0 {
+define amdgpu_kernel void @v_pack_v2f16_imm_hi(i32 addrspace(1)* %in0) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in0.gep = getelementptr inbounds i32, i32 addrspace(1)* %in0, i64 %tid.ext
@@ -187,7 +187,7 @@ define void @v_pack_v2f16_imm_hi(i32 add
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[MASKED]]
; GFX9: ; use [[PACKED]]
-define void @v_pack_v2f16_inline_f16imm_hi(i32 addrspace(1)* %in0) #0 {
+define amdgpu_kernel void @v_pack_v2f16_inline_f16imm_hi(i32 addrspace(1)* %in0) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in0.gep = getelementptr inbounds i32, i32 addrspace(1)* %in0, i64 %tid.ext
@@ -209,7 +209,7 @@ define void @v_pack_v2f16_inline_f16imm_
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], 64, 16, [[MASKED]]
; GFX9: ; use [[PACKED]]
-define void @v_pack_v2f16_inline_imm_hi(i32 addrspace(1)* %in0) #0 {
+define amdgpu_kernel void @v_pack_v2f16_inline_imm_hi(i32 addrspace(1)* %in0) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in0.gep = getelementptr inbounds i32, i32 addrspace(1)* %in0, i64 %tid.ext
Modified: llvm/trunk/test/CodeGen/AMDGPU/pack.v2i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/pack.v2i16.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/pack.v2i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/pack.v2i16.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@
; GFX9: s_load_dword [[VAL1:s[0-9]+]]
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], [[VAL0]], [[VAL1]]
; GFX9: ; use [[PACKED]]
-define void @s_pack_v2i16(i32 addrspace(2)* %in0, i32 addrspace(2)* %in1) #0 {
+define amdgpu_kernel void @s_pack_v2i16(i32 addrspace(2)* %in0, i32 addrspace(2)* %in1) #0 {
%val0 = load volatile i32, i32 addrspace(2)* %in0
%val1 = load volatile i32, i32 addrspace(2)* %in1
%lo = trunc i32 %val0 to i16
@@ -26,7 +26,7 @@ define void @s_pack_v2i16(i32 addrspace(
; GFX9: s_load_dword [[VAL1:s[0-9]+]]
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], 0x1c8, [[VAL1]]
; GFX9: ; use [[PACKED]]
-define void @s_pack_v2i16_imm_lo(i32 addrspace(2)* %in1) #0 {
+define amdgpu_kernel void @s_pack_v2i16_imm_lo(i32 addrspace(2)* %in1) #0 {
%val1 = load i32, i32 addrspace(2)* %in1
%hi = trunc i32 %val1 to i16
%vec.0 = insertelement <2 x i16> undef, i16 456, i32 0
@@ -41,7 +41,7 @@ define void @s_pack_v2i16_imm_lo(i32 add
; GFX9: s_load_dword [[VAL0:s[0-9]+]]
; GFX9: s_pack_ll_b32_b16 [[PACKED:s[0-9]+]], [[VAL0]], 0x1c8
; GFX9: ; use [[PACKED]]
-define void @s_pack_v2i16_imm_hi(i32 addrspace(2)* %in0) #0 {
+define amdgpu_kernel void @s_pack_v2i16_imm_hi(i32 addrspace(2)* %in0) #0 {
%val0 = load i32, i32 addrspace(2)* %in0
%lo = trunc i32 %val0 to i16
%vec.0 = insertelement <2 x i16> undef, i16 %lo, i32 0
@@ -60,7 +60,7 @@ define void @s_pack_v2i16_imm_hi(i32 add
; GFX9-FLUSH: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]]
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[MASKED]]
; GFX9: ; use [[PACKED]]
-define void @v_pack_v2i16(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 {
+define amdgpu_kernel void @v_pack_v2i16(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in0.gep = getelementptr inbounds i32, i32 addrspace(1)* %in0, i64 %tid.ext
@@ -85,7 +85,7 @@ define void @v_pack_v2i16(i32 addrspace(
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[MASKED]]
; GFX9: v_add_i32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]]
-define void @v_pack_v2i16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 {
+define amdgpu_kernel void @v_pack_v2i16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in0.gep = getelementptr inbounds i32, i32 addrspace(1)* %in0, i64 %tid.ext
@@ -111,7 +111,7 @@ define void @v_pack_v2i16_user(i32 addrs
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]]
; GFX9: ; use [[PACKED]]
-define void @v_pack_v2i16_imm_lo(i32 addrspace(1)* %in1) #0 {
+define amdgpu_kernel void @v_pack_v2i16_imm_lo(i32 addrspace(1)* %in1) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in1.gep = getelementptr inbounds i32, i32 addrspace(1)* %in1, i64 %tid.ext
@@ -130,7 +130,7 @@ define void @v_pack_v2i16_imm_lo(i32 add
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, 64
; GFX9: ; use [[PACKED]]
-define void @v_pack_v2i16_inline_imm_lo(i32 addrspace(1)* %in1) #0 {
+define amdgpu_kernel void @v_pack_v2i16_inline_imm_lo(i32 addrspace(1)* %in1) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in1.gep = getelementptr inbounds i32, i32 addrspace(1)* %in1, i64 %tid.ext
@@ -151,7 +151,7 @@ define void @v_pack_v2i16_inline_imm_lo(
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[VAL0]]
; GFX9: ; use [[PACKED]]
-define void @v_pack_v2i16_imm_hi(i32 addrspace(1)* %in0) #0 {
+define amdgpu_kernel void @v_pack_v2i16_imm_hi(i32 addrspace(1)* %in0) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in0.gep = getelementptr inbounds i32, i32 addrspace(1)* %in0, i64 %tid.ext
@@ -169,7 +169,7 @@ define void @v_pack_v2i16_imm_hi(i32 add
; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL]], 7
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], 7, 16, [[VAL0]]
; GFX9: ; use [[PACKED]]
-define void @v_pack_v2i16_inline_imm_hi(i32 addrspace(1)* %in0) #0 {
+define amdgpu_kernel void @v_pack_v2i16_inline_imm_hi(i32 addrspace(1)* %in0) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in0.gep = getelementptr inbounds i32, i32 addrspace(1)* %in0, i64 %tid.ext
Modified: llvm/trunk/test/CodeGen/AMDGPU/packetizer.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/packetizer.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/packetizer.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/packetizer.ll Tue Mar 21 16:39:51 2017
@@ -7,7 +7,7 @@
; CHECK: BIT_ALIGN_INT T{{[0-9]}}.Z
; CHECK: BIT_ALIGN_INT * T{{[0-9]}}.W
-define void @test(i32 addrspace(1)* %out, i32 %x_arg, i32 %y_arg, i32 %z_arg, i32 %w_arg, i32 %e) {
+define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 %x_arg, i32 %y_arg, i32 %z_arg, i32 %w_arg, i32 %e) {
entry:
%shl = sub i32 32, %e
%x = add i32 %x_arg, 1
Modified: llvm/trunk/test/CodeGen/AMDGPU/parallelandifcollapse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/parallelandifcollapse.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/parallelandifcollapse.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/parallelandifcollapse.ll Tue Mar 21 16:39:51 2017
@@ -11,7 +11,7 @@
; to do its transfomation, however now that we are using local memory for
; allocas, the transformation isn't happening.
-define void @_Z9chk1D_512v() #0 {
+define amdgpu_kernel void @_Z9chk1D_512v() #0 {
entry:
%a0 = alloca i32, align 4
%b0 = alloca i32, align 4
Modified: llvm/trunk/test/CodeGen/AMDGPU/parallelorifcollapse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/parallelorifcollapse.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/parallelorifcollapse.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/parallelorifcollapse.ll Tue Mar 21 16:39:51 2017
@@ -12,7 +12,7 @@
; CHECK: OR_INT
; CHECK-NEXT: OR_INT
; CHECK-NEXT: OR_INT
-define void @_Z9chk1D_512v() #0 {
+define amdgpu_kernel void @_Z9chk1D_512v() #0 {
entry:
%a0 = alloca i32, align 4
%b0 = alloca i32, align 4
Modified: llvm/trunk/test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll Tue Mar 21 16:39:51 2017
@@ -10,7 +10,7 @@
declare i32 @llvm.amdgcn.workitem.id.x() #1
-define void @dead_def_subregister(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) #0 {
+define amdgpu_kernel void @dead_def_subregister(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%val = load i64, i64 addrspace(1)* %in.gep
Modified: llvm/trunk/test/CodeGen/AMDGPU/predicates.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/predicates.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/predicates.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/predicates.ll Tue Mar 21 16:39:51 2017
@@ -6,7 +6,7 @@
; CHECK-LABEL: {{^}}simple_if:
; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred,
; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
-define void @simple_if(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @simple_if(i32 addrspace(1)* %out, i32 %in) {
entry:
%cmp0 = icmp sgt i32 %in, 0
br i1 %cmp0, label %IF, label %ENDIF
@@ -25,7 +25,7 @@ ENDIF:
; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred,
; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
-define void @simple_if_else(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @simple_if_else(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = icmp sgt i32 %in, 0
br i1 %0, label %IF, label %ELSE
@@ -51,7 +51,7 @@ ENDIF:
; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Exec
; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred,
; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
-define void @nested_if(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @nested_if(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = icmp sgt i32 %in, 0
br i1 %0, label %IF0, label %ENDIF
@@ -79,7 +79,7 @@ ENDIF:
; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred,
; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
-define void @nested_if_else(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @nested_if_else(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = icmp sgt i32 %in, 0
br i1 %0, label %IF0, label %ENDIF
Modified: llvm/trunk/test/CodeGen/AMDGPU/private-access-no-objects.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/private-access-no-objects.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/private-access-no-objects.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/private-access-no-objects.ll Tue Mar 21 16:39:51 2017
@@ -18,7 +18,7 @@
; OPTNONE-NOT: s_mov_b32
; OPTNONE: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s7 offen{{$}}
-define void @store_to_undef() #0 {
+define amdgpu_kernel void @store_to_undef() #0 {
store volatile i32 0, i32* undef
ret void
}
@@ -28,7 +28,7 @@ define void @store_to_undef() #0 {
; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3]
; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s7{{$}}
; OPT: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offen{{$}}
-define void @store_to_inttoptr() #0 {
+define amdgpu_kernel void @store_to_inttoptr() #0 {
store volatile i32 0, i32* inttoptr (i32 123 to i32*)
ret void
}
@@ -38,7 +38,7 @@ define void @store_to_inttoptr() #0 {
; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3]
; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s7{{$}}
; OPT: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offen{{$}}
-define void @load_from_undef() #0 {
+define amdgpu_kernel void @load_from_undef() #0 {
%ld = load volatile i32, i32* undef
ret void
}
@@ -48,7 +48,7 @@ define void @load_from_undef() #0 {
; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3]
; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s7{{$}}
; OPT: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offen{{$}}
-define void @load_from_inttoptr() #0 {
+define amdgpu_kernel void @load_from_inttoptr() #0 {
%ld = load volatile i32, i32* inttoptr (i32 123 to i32*)
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/private-element-size.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/private-element-size.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/private-element-size.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/private-element-size.ll Tue Mar 21 16:39:51 2017
@@ -36,7 +36,7 @@
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}
-define void @private_elt_size_v4i32(<4 x i32> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
+define amdgpu_kernel void @private_elt_size_v4i32(<4 x i32> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%idxprom = sext i32 %tid to i64
@@ -106,7 +106,7 @@ entry:
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:20{{$}}
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:24{{$}}
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:28{{$}}
-define void @private_elt_size_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
+define amdgpu_kernel void @private_elt_size_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%idxprom = sext i32 %tid to i64
@@ -143,7 +143,7 @@ entry:
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
-define void @private_elt_size_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
+define amdgpu_kernel void @private_elt_size_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%idxprom = sext i32 %tid to i64
@@ -179,7 +179,7 @@ entry:
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
-define void @private_elt_size_f64(double addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
+define amdgpu_kernel void @private_elt_size_f64(double addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%idxprom = sext i32 %tid to i64
@@ -228,7 +228,7 @@ entry:
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}
-define void @private_elt_size_v2i64(<2 x i64> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
+define amdgpu_kernel void @private_elt_size_v2i64(<2 x i64> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%idxprom = sext i32 %tid to i64
Modified: llvm/trunk/test/CodeGen/AMDGPU/private-memory-atomics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/private-memory-atomics.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/private-memory-atomics.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/private-memory-atomics.ll Tue Mar 21 16:39:51 2017
@@ -4,7 +4,7 @@
; This works because promote allocas pass replaces these with LDS atomics.
; Private atomics have no real use, but at least shouldn't crash on it.
-define void @atomicrmw_private(i32 addrspace(1)* %out, i32 %in) nounwind {
+define amdgpu_kernel void @atomicrmw_private(i32 addrspace(1)* %out, i32 %in) nounwind {
entry:
%tmp = alloca [2 x i32]
%tmp1 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
@@ -17,7 +17,7 @@ entry:
ret void
}
-define void @cmpxchg_private(i32 addrspace(1)* %out, i32 %in) nounwind {
+define amdgpu_kernel void @cmpxchg_private(i32 addrspace(1)* %out, i32 %in) nounwind {
entry:
%tmp = alloca [2 x i32]
%tmp1 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/private-memory-broken.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/private-memory-broken.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/private-memory-broken.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/private-memory-broken.ll Tue Mar 21 16:39:51 2017
@@ -7,7 +7,7 @@
declare i32 @foo(i32*) nounwind
-define void @call_private(i32 addrspace(1)* %out, i32 %in) nounwind {
+define amdgpu_kernel void @call_private(i32 addrspace(1)* %out, i32 %in) nounwind {
entry:
%tmp = alloca [2 x i32]
%tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/private-memory-r600.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/private-memory-r600.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/private-memory-r600.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/private-memory-r600.ll Tue Mar 21 16:39:51 2017
@@ -16,7 +16,7 @@ declare i32 @llvm.r600.read.tidig.x() no
; OPT: call i32 @llvm.r600.read.tidig.y(), !range !0
; OPT: call i32 @llvm.r600.read.tidig.z(), !range !0
-define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
+define amdgpu_kernel void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
entry:
%stack = alloca [5 x i32], align 4
%0 = load i32, i32 addrspace(1)* %in, align 4
@@ -47,7 +47,7 @@ entry:
; R600-NOT: MOVA_INT
%struct.point = type { i32, i32 }
-define void @multiple_structs(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @multiple_structs(i32 addrspace(1)* %out) #0 {
entry:
%a = alloca %struct.point
%b = alloca %struct.point
@@ -75,7 +75,7 @@ entry:
; FUNC-LABEL: {{^}}direct_loop:
; R600-NOT: MOVA_INT
-define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
entry:
%prv_array_const = alloca [2 x i32]
%prv_array = alloca [2 x i32]
@@ -110,7 +110,7 @@ for.end:
; FUNC-LABEL: {{^}}short_array:
; R600: MOVA_INT
-define void @short_array(i32 addrspace(1)* %out, i32 %index) #0 {
+define amdgpu_kernel void @short_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
%0 = alloca [2 x i16]
%1 = getelementptr inbounds [2 x i16], [2 x i16]* %0, i32 0, i32 0
@@ -127,7 +127,7 @@ entry:
; FUNC-LABEL: {{^}}char_array:
; R600: MOVA_INT
-define void @char_array(i32 addrspace(1)* %out, i32 %index) #0 {
+define amdgpu_kernel void @char_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
%0 = alloca [2 x i8]
%1 = getelementptr inbounds [2 x i8], [2 x i8]* %0, i32 0, i32 0
@@ -148,7 +148,7 @@ entry:
; R600-NOT: MOV T0.X
; Additional check in case the move ends up in the last slot
; R600-NOT: MOV * TO.X
-define void @work_item_info(i32 addrspace(1)* %out, i32 %in) #0 {
+define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) #0 {
entry:
%0 = alloca [2 x i32]
%1 = getelementptr inbounds [2 x i32], [2 x i32]* %0, i32 0, i32 0
@@ -169,7 +169,7 @@ entry:
; R600_CHECK: MOV
; R600_CHECK: [[CHAN:[XYZW]]]+
; R600-NOT: [[CHAN]]+
-define void @no_overlap(i32 addrspace(1)* %out, i32 %in) #0 {
+define amdgpu_kernel void @no_overlap(i32 addrspace(1)* %out, i32 %in) #0 {
entry:
%0 = alloca [3 x i8], align 1
%1 = alloca [2 x i8], align 1
@@ -193,7 +193,7 @@ entry:
ret void
}
-define void @char_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
+define amdgpu_kernel void @char_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
%alloca = alloca [2 x [2 x i8]]
%gep0 = getelementptr inbounds [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 0
@@ -207,7 +207,7 @@ entry:
ret void
}
-define void @i32_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
+define amdgpu_kernel void @i32_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
%alloca = alloca [2 x [2 x i32]]
%gep0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
@@ -220,7 +220,7 @@ entry:
ret void
}
-define void @i64_array_array(i64 addrspace(1)* %out, i32 %index) #0 {
+define amdgpu_kernel void @i64_array_array(i64 addrspace(1)* %out, i32 %index) #0 {
entry:
%alloca = alloca [2 x [2 x i64]]
%gep0 = getelementptr inbounds [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 0
@@ -235,7 +235,7 @@ entry:
%struct.pair32 = type { i32, i32 }
-define void @struct_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
+define amdgpu_kernel void @struct_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
%alloca = alloca [2 x [2 x %struct.pair32]]
%gep0 = getelementptr inbounds [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 0, i32 1
@@ -248,7 +248,7 @@ entry:
ret void
}
-define void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) #0 {
+define amdgpu_kernel void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
%alloca = alloca [2 x %struct.pair32]
%gep0 = getelementptr inbounds [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 0, i32 1
@@ -261,7 +261,7 @@ entry:
ret void
}
-define void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind {
+define amdgpu_kernel void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind {
entry:
%tmp = alloca [2 x i32]
%tmp1 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
@@ -282,7 +282,7 @@ entry:
; SI-NOT: ds_write
; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ;
-define void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
%alloca = alloca [16 x i32]
%tmp0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
store i32 5, i32* %tmp0
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-array-allocation.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-array-allocation.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-array-allocation.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-array-allocation.ll Tue Mar 21 16:39:51 2017
@@ -5,7 +5,7 @@
; CHECK-LABEL: @array_alloca(
; CHECK: %stack = alloca i32, i32 5, align 4
-define void @array_alloca(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
+define amdgpu_kernel void @array_alloca(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
entry:
%stack = alloca i32, i32 5, align 4
%ld0 = load i32, i32 addrspace(1)* %in, align 4
@@ -27,7 +27,7 @@ entry:
; CHECK-LABEL: @array_alloca_dynamic(
; CHECK: %stack = alloca i32, i32 %size, align 4
-define void @array_alloca_dynamic(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in, i32 %size) #0 {
+define amdgpu_kernel void @array_alloca_dynamic(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in, i32 %size) #0 {
entry:
%stack = alloca i32, i32 %size, align 4
%ld0 = load i32, i32 addrspace(1)* %in, align 4
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll Tue Mar 21 16:39:51 2017
@@ -7,14 +7,14 @@ declare void @foo(float*) #0
declare void @foo.varargs(...) #0
; CHECK: in function crash_call_constexpr_cast{{.*}}: unsupported call to function foo
-define void @crash_call_constexpr_cast() #0 {
+define amdgpu_kernel void @crash_call_constexpr_cast() #0 {
%alloca = alloca i32
call void bitcast (void (float*)* @foo to void (i32*)*)(i32* %alloca) #0
ret void
}
; CHECK: in function crash_call_constexpr_cast{{.*}}: unsupported call to function foo.varargs
-define void @crash_call_constexpr_cast_varargs() #0 {
+define amdgpu_kernel void @crash_call_constexpr_cast_varargs() #0 {
%alloca = alloca i32
call void bitcast (void (...)* @foo.varargs to void (i32*)*)(i32* %alloca) #0
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-globals.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-globals.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-globals.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-globals.ll Tue Mar 21 16:39:51 2017
@@ -5,12 +5,12 @@
@global_array0 = internal unnamed_addr addrspace(3) global [750 x [10 x i32]] undef, align 4
@global_array1 = internal unnamed_addr addrspace(3) global [750 x [10 x i32]] undef, align 4
-; IR-LABEL: define void @promote_alloca_size_256(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
+; IR-LABEL: define amdgpu_kernel void @promote_alloca_size_256(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
; IR: alloca [10 x i32]
; ASM-LABEL: {{^}}promote_alloca_size_256:
; ASM: ; LDSByteSize: 60000 bytes/workgroup (compile time only)
-define void @promote_alloca_size_256(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
+define amdgpu_kernel void @promote_alloca_size_256(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
entry:
%stack = alloca [10 x i32], align 4
%tmp = load i32, i32 addrspace(1)* %in, align 4
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll Tue Mar 21 16:39:51 2017
@@ -7,7 +7,7 @@ declare i8* @llvm.invariant.group.barrie
; GCN-LABEL: {{^}}use_invariant_promotable_lds:
; GCN: buffer_load_dword
; GCN: ds_write_b32
-define void @use_invariant_promotable_lds(i32 addrspace(1)* %arg) #2 {
+define amdgpu_kernel void @use_invariant_promotable_lds(i32 addrspace(1)* %arg) #2 {
bb:
%tmp = alloca i32, align 4
%tmp1 = bitcast i32* %tmp to i8*
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-lifetime.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-lifetime.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-lifetime.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-lifetime.ll Tue Mar 21 16:39:51 2017
@@ -7,7 +7,7 @@ declare void @llvm.lifetime.end(i64, i8*
; OPT-NOT: alloca i32
; OPT-NOT: llvm.lifetime
; OPT: store i32 %tmp3, i32 addrspace(3)*
-define void @use_lifetime_promotable_lds(i32 addrspace(1)* %arg) #2 {
+define amdgpu_kernel void @use_lifetime_promotable_lds(i32 addrspace(1)* %arg) #2 {
bb:
%tmp = alloca i32, align 4
%tmp1 = bitcast i32* %tmp to i8*
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll Tue Mar 21 16:39:51 2017
@@ -14,7 +14,7 @@ declare i32 @llvm.objectsize.i32.p0i8(i8
; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memcpy.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false)
; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out.bc, i8 addrspace(3)* %alloca.bc, i32 68, i32 4, i1 false)
-define void @promote_with_memcpy(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @promote_with_memcpy(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%alloca = alloca [17 x i32], align 4
%alloca.bc = bitcast [17 x i32]* %alloca to i8*
%in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)*
@@ -28,7 +28,7 @@ define void @promote_with_memcpy(i32 add
; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memmove.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: call void @llvm.memmove.p3i8.p1i8.i32(i8 addrspace(3)* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false)
; CHECK: call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* %out.bc, i8 addrspace(3)* %alloca.bc, i32 68, i32 4, i1 false)
-define void @promote_with_memmove(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @promote_with_memmove(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%alloca = alloca [17 x i32], align 4
%alloca.bc = bitcast [17 x i32]* %alloca to i8*
%in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)*
@@ -41,7 +41,7 @@ define void @promote_with_memmove(i32 ad
; CHECK-LABEL: @promote_with_memset(
; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memset.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: call void @llvm.memset.p3i8.i32(i8 addrspace(3)* %alloca.bc, i8 7, i32 68, i32 4, i1 false)
-define void @promote_with_memset(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @promote_with_memset(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%alloca = alloca [17 x i32], align 4
%alloca.bc = bitcast [17 x i32]* %alloca to i8*
%in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)*
@@ -53,7 +53,7 @@ define void @promote_with_memset(i32 add
; CHECK-LABEL: @promote_with_objectsize(
; CHECK: [[PTR:%[0-9]+]] = getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_objectsize.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* %alloca.bc, i1 false, i1 false)
-define void @promote_with_objectsize(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @promote_with_objectsize(i32 addrspace(1)* %out) #0 {
%alloca = alloca [17 x i32], align 4
%alloca.bc = bitcast [17 x i32]* %alloca to i8*
%size = call i32 @llvm.objectsize.i32.p0i8(i8* %alloca.bc, i1 false, i1 false)
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll Tue Mar 21 16:39:51 2017
@@ -5,7 +5,7 @@
; NOOPTS: workgroup_group_segment_byte_size = 0{{$}}
; NOOPTS-NOT ds_write
; OPTS: ds_write
-define void @promote_alloca_i32_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
+define amdgpu_kernel void @promote_alloca_i32_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
entry:
%alloca = alloca [2 x [2 x i32]]
%gep0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
@@ -21,7 +21,7 @@ entry:
; ALL-LABEL: {{^}}optnone_promote_alloca_i32_array_array:
; ALL: workgroup_group_segment_byte_size = 0{{$}}
; ALL-NOT ds_write
-define void @optnone_promote_alloca_i32_array_array(i32 addrspace(1)* %out, i32 %index) #1 {
+define amdgpu_kernel void @optnone_promote_alloca_i32_array_array(i32 addrspace(1)* %out, i32 %index) #1 {
entry:
%alloca = alloca [2 x [2 x i32]]
%gep0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll Tue Mar 21 16:39:51 2017
@@ -30,7 +30,7 @@
; GCN-LABEL: {{^}}promote_alloca_size_order_0:
; GCN: workgroup_group_segment_byte_size = 2340
-define void @promote_alloca_size_order_0(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in, i32 %idx) #0 {
+define amdgpu_kernel void @promote_alloca_size_order_0(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in, i32 %idx) #0 {
entry:
%stack = alloca [5 x i32], align 4
%tmp0 = load i32, i32 addrspace(1)* %in, align 4
@@ -62,7 +62,7 @@ entry:
; GCN-LABEL: {{^}}promote_alloca_size_order_1:
; GCN: workgroup_group_segment_byte_size = 2352
-define void @promote_alloca_size_order_1(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in, i32 %idx) #0 {
+define amdgpu_kernel void @promote_alloca_size_order_1(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in, i32 %idx) #0 {
entry:
%stack = alloca [5 x i32], align 4
%tmp0 = load i32, i32 addrspace(1)* %in, align 4
@@ -100,7 +100,7 @@ entry:
; GCN-LABEL: {{^}}promote_alloca_align_pad_guess_over_limit:
; GCN: workgroup_group_segment_byte_size = 1060
-define void @promote_alloca_align_pad_guess_over_limit(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in, i32 %idx) #0 {
+define amdgpu_kernel void @promote_alloca_align_pad_guess_over_limit(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in, i32 %idx) #0 {
entry:
%stack = alloca [5 x i32], align 4
%tmp0 = load i32, i32 addrspace(1)* %in, align 4
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll Tue Mar 21 16:39:51 2017
@@ -5,7 +5,7 @@
; GCN-LABEL: {{^}}stored_lds_pointer_value:
; GCN: buffer_store_dword v
-define void @stored_lds_pointer_value(float* addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @stored_lds_pointer_value(float* addrspace(1)* %ptr) #0 {
%tmp = alloca float
store float 0.0, float *%tmp
store float* %tmp, float* addrspace(1)* %ptr
@@ -14,7 +14,7 @@ define void @stored_lds_pointer_value(fl
; GCN-LABEL: {{^}}stored_lds_pointer_value_offset:
; GCN: buffer_store_dword v
-define void @stored_lds_pointer_value_offset(float* addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @stored_lds_pointer_value_offset(float* addrspace(1)* %ptr) #0 {
%tmp0 = alloca float
%tmp1 = alloca float
store float 0.0, float *%tmp0
@@ -29,7 +29,7 @@ define void @stored_lds_pointer_value_of
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
; GCN: buffer_store_dword v
; GCN: buffer_store_dword v
-define void @stored_lds_pointer_value_gep(float* addrspace(1)* %ptr, i32 %idx) #0 {
+define amdgpu_kernel void @stored_lds_pointer_value_gep(float* addrspace(1)* %ptr, i32 %idx) #0 {
bb:
%tmp = alloca float, i32 16
store float 0.0, float* %tmp
@@ -46,7 +46,7 @@ bb:
; GCN: buffer_store_dword
; GCN: buffer_store_dword
; GCN: buffer_store_dword
-define void @stored_vector_pointer_value(i32* addrspace(1)* %out, i32 %index) {
+define amdgpu_kernel void @stored_vector_pointer_value(i32* addrspace(1)* %out, i32 %index) {
entry:
%tmp0 = alloca [4 x i32]
%x = getelementptr inbounds [4 x i32], [4 x i32]* %tmp0, i32 0, i32 0
@@ -64,7 +64,7 @@ entry:
; GCN-LABEL: {{^}}stored_fi_to_self:
; GCN-NOT: ds_
-define void @stored_fi_to_self() #0 {
+define amdgpu_kernel void @stored_fi_to_self() #0 {
%tmp = alloca i32*
store volatile i32* inttoptr (i32 1234 to i32*), i32** %tmp
%bitcast = bitcast i32** %tmp to i32*
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll Tue Mar 21 16:39:51 2017
@@ -8,7 +8,7 @@
; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a
; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %b
; CHECK: %cmp = icmp eq i32 addrspace(3)* %ptr0, %ptr1
-define void @lds_promoted_alloca_icmp_same_derived_pointer(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @lds_promoted_alloca_icmp_same_derived_pointer(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
%alloca = alloca [16 x i32], align 4
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
%ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
@@ -22,7 +22,7 @@ define void @lds_promoted_alloca_icmp_sa
; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_null_rhs.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a
; CHECK: %cmp = icmp eq i32 addrspace(3)* %ptr0, null
-define void @lds_promoted_alloca_icmp_null_rhs(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @lds_promoted_alloca_icmp_null_rhs(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
%alloca = alloca [16 x i32], align 4
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
%cmp = icmp eq i32* %ptr0, null
@@ -35,7 +35,7 @@ define void @lds_promoted_alloca_icmp_nu
; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_null_lhs.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a
; CHECK: %cmp = icmp eq i32 addrspace(3)* null, %ptr0
-define void @lds_promoted_alloca_icmp_null_lhs(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @lds_promoted_alloca_icmp_null_lhs(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
%alloca = alloca [16 x i32], align 4
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
%cmp = icmp eq i32* null, %ptr0
@@ -49,7 +49,7 @@ define void @lds_promoted_alloca_icmp_nu
; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
; CHECK: %ptr1 = call i32* @get_unknown_pointer()
; CHECK: %cmp = icmp eq i32* %ptr0, %ptr1
-define void @lds_promoted_alloca_icmp_unknown_ptr(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @lds_promoted_alloca_icmp_unknown_ptr(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
%alloca = alloca [16 x i32], align 4
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
%ptr1 = call i32* @get_unknown_pointer()
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll Tue Mar 21 16:39:51 2017
@@ -13,7 +13,7 @@
; CHECK: endif:
; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4
-define void @branch_ptr_var_same_alloca(i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @branch_ptr_var_same_alloca(i32 %a, i32 %b) #0 {
entry:
%alloca = alloca [64 x i32], align 4
br i1 undef, label %if, label %else
@@ -34,7 +34,7 @@ endif:
; CHECK-LABEL: @branch_ptr_phi_alloca_null_0(
; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %if ], [ null, %entry ]
-define void @branch_ptr_phi_alloca_null_0(i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @branch_ptr_phi_alloca_null_0(i32 %a, i32 %b) #0 {
entry:
%alloca = alloca [64 x i32], align 4
br i1 undef, label %if, label %endif
@@ -51,7 +51,7 @@ endif:
; CHECK-LABEL: @branch_ptr_phi_alloca_null_1(
; CHECK: %phi.ptr = phi i32 addrspace(3)* [ null, %entry ], [ %arrayidx0, %if ]
-define void @branch_ptr_phi_alloca_null_1(i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @branch_ptr_phi_alloca_null_1(i32 %a, i32 %b) #0 {
entry:
%alloca = alloca [64 x i32], align 4
br i1 undef, label %if, label %endif
@@ -73,7 +73,7 @@ endif:
; CHECK: br label %exit
; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %entry ]
; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4
-define void @one_phi_value(i32 %a) #0 {
+define amdgpu_kernel void @one_phi_value(i32 %a) #0 {
entry:
%alloca = alloca [64 x i32], align 4
%arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
@@ -97,7 +97,7 @@ exit:
; CHECK: endif:
; CHECK: %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
; CHECK: store i32 0, i32* %phi.ptr, align 4
-define void @branch_ptr_alloca_unknown_obj(i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @branch_ptr_alloca_unknown_obj(i32 %a, i32 %b) #0 {
entry:
%alloca = alloca [64 x i32], align 4
br i1 undef, label %if, label %else
@@ -134,7 +134,7 @@ endif:
; CHECK-LABEL: @ptr_induction_var_same_alloca(
; CHECK: %alloca = alloca [64 x i32], align 4
; CHECK: phi i32* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ]
-define void @ptr_induction_var_same_alloca() #0 {
+define amdgpu_kernel void @ptr_induction_var_same_alloca() #0 {
entry:
%alloca = alloca [64 x i32], align 4
%arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 2
@@ -172,7 +172,7 @@ for.body:
; CHECK: %alloca = alloca [64 x i32], align 4
; CHECK: %p.08 = phi i32* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ]
; CHECK: %cmp = icmp eq i32* %incdec.ptr, %call
-define void @ptr_induction_var_alloca_unknown() #0 {
+define amdgpu_kernel void @ptr_induction_var_alloca_unknown() #0 {
entry:
%alloca = alloca [64 x i32], align 4
%arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 2
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll Tue Mar 21 16:39:51 2017
@@ -3,7 +3,7 @@
; CHECK-LABEL: @lds_promoted_alloca_select_invalid_pointer_operand(
; CHECK: %alloca = alloca i32
; CHECK: select i1 undef, i32* undef, i32* %alloca
-define void @lds_promoted_alloca_select_invalid_pointer_operand() #0 {
+define amdgpu_kernel void @lds_promoted_alloca_select_invalid_pointer_operand() #0 {
%alloca = alloca i32, align 4
%select = select i1 undef, i32* undef, i32* %alloca
store i32 0, i32* %select, align 4
@@ -16,7 +16,7 @@ define void @lds_promoted_alloca_select_
; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %b
; CHECK: %select = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
; CHECK: store i32 0, i32 addrspace(3)* %select, align 4
-define void @lds_promote_alloca_select_two_derived_pointers(i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @lds_promote_alloca_select_two_derived_pointers(i32 %a, i32 %b) #0 {
%alloca = alloca [16 x i32], align 4
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
%ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
@@ -33,7 +33,7 @@ define void @lds_promote_alloca_select_t
; CHECK: %ptr0 = getelementptr inbounds i32, i32* %alloca0, i32 %a
; CHECK: %ptr1 = getelementptr inbounds i32, i32* %alloca1, i32 %b
; CHECK: %select = select i1 undef, i32* %ptr0, i32* %ptr1
-define void @lds_promote_alloca_select_two_allocas(i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @lds_promote_alloca_select_two_allocas(i32 %a, i32 %b) #0 {
%alloca0 = alloca i32, i32 16, align 4
%alloca1 = alloca i32, i32 16, align 4
%ptr0 = getelementptr inbounds i32, i32* %alloca0, i32 %a
@@ -50,7 +50,7 @@ define void @lds_promote_alloca_select_t
; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 3
; CHECK: %select = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
; CHECK: store i32 0, i32 addrspace(3)* %select, align 4
-define void @lds_promote_alloca_select_two_derived_constant_pointers() #0 {
+define amdgpu_kernel void @lds_promote_alloca_select_two_derived_constant_pointers() #0 {
%alloca = alloca [16 x i32], align 4
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 1
%ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 3
@@ -67,7 +67,7 @@ define void @lds_promote_alloca_select_t
; CHECK: %select0 = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
; CHECK: %select1 = select i1 undef, i32 addrspace(3)* %select0, i32 addrspace(3)* %ptr2
; CHECK: store i32 0, i32 addrspace(3)* %select1, align 4
-define void @lds_promoted_alloca_select_input_select(i32 %a, i32 %b, i32 %c) #0 {
+define amdgpu_kernel void @lds_promoted_alloca_select_input_select(i32 %a, i32 %b, i32 %c) #0 {
%alloca = alloca [16 x i32], align 4
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
%ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
@@ -78,7 +78,7 @@ define void @lds_promoted_alloca_select_
ret void
}
-define void @lds_promoted_alloca_select_input_phi(i32 %a, i32 %b, i32 %c) #0 {
+define amdgpu_kernel void @lds_promoted_alloca_select_input_phi(i32 %a, i32 %b, i32 %c) #0 {
entry:
%alloca = alloca [16 x i32], align 4
%ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
@@ -102,7 +102,7 @@ bb2:
; CHECK-LABEL: @select_null_rhs(
; CHECK-NOT: alloca
; CHECK: select i1 %tmp2, double addrspace(3)* %{{[0-9]+}}, double addrspace(3)* null
-define void @select_null_rhs(double addrspace(1)* nocapture %arg, i32 %arg1) #1 {
+define amdgpu_kernel void @select_null_rhs(double addrspace(1)* nocapture %arg, i32 %arg1) #1 {
bb:
%tmp = alloca double, align 8
store double 0.000000e+00, double* %tmp, align 8
@@ -117,7 +117,7 @@ bb:
; CHECK-LABEL: @select_null_lhs(
; CHECK-NOT: alloca
; CHECK: select i1 %tmp2, double addrspace(3)* null, double addrspace(3)* %{{[0-9]+}}
-define void @select_null_lhs(double addrspace(1)* nocapture %arg, i32 %arg1) #1 {
+define amdgpu_kernel void @select_null_lhs(double addrspace(1)* nocapture %arg, i32 %arg1) #1 {
bb:
%tmp = alloca double, align 8
store double 0.000000e+00, double* %tmp, align 8
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-unhandled-intrinsic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-unhandled-intrinsic.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-unhandled-intrinsic.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-unhandled-intrinsic.ll Tue Mar 21 16:39:51 2017
@@ -8,7 +8,7 @@ declare void @llvm.stackrestore(i8*) #2
; CHECK-LABEL: @try_promote_unhandled_intrinsic(
; CHECK: alloca
; CHECK: call void @llvm.stackrestore(i8* %tmp1)
-define void @try_promote_unhandled_intrinsic(i32 addrspace(1)* %arg) #2 {
+define amdgpu_kernel void @try_promote_unhandled_intrinsic(i32 addrspace(1)* %arg) #2 {
bb:
%tmp = alloca i32, align 4
%tmp1 = bitcast i32* %tmp to i8*
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-volatile.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-volatile.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-volatile.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-volatile.ll Tue Mar 21 16:39:51 2017
@@ -3,7 +3,7 @@
; CHECK-LABEL: @volatile_load(
; CHECK: alloca [5 x i32]
; CHECK: load volatile i32, i32*
-define void @volatile_load(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
+define amdgpu_kernel void @volatile_load(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
entry:
%stack = alloca [5 x i32], align 4
%tmp = load i32, i32 addrspace(1)* %in, align 4
@@ -16,7 +16,7 @@ entry:
; CHECK-LABEL: @volatile_store(
; CHECK: alloca [5 x i32]
; CHECK: store volatile i32 %tmp, i32*
-define void @volatile_store(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
+define amdgpu_kernel void @volatile_store(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
entry:
%stack = alloca [5 x i32], align 4
%tmp = load i32, i32 addrspace(1)* %in, align 4
@@ -30,7 +30,7 @@ entry:
; CHECK: alloca double
; CHECK: load double
; CHECK: load volatile double
-define void @volatile_and_non_volatile_load(double addrspace(1)* nocapture %arg, i32 %arg1) #0 {
+define amdgpu_kernel void @volatile_and_non_volatile_load(double addrspace(1)* nocapture %arg, i32 %arg1) #0 {
bb:
%tmp = alloca double, align 8
store double 0.000000e+00, double* %tmp, align 8
Modified: llvm/trunk/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll Tue Mar 21 16:39:51 2017
@@ -2,7 +2,7 @@
; Don't crash
; CHECK: MAX_UINT
-define void @test(i64 addrspace(1)* %out) {
+define amdgpu_kernel void @test(i64 addrspace(1)* %out) {
bb:
store i64 2, i64 addrspace(1)* %out
%tmp = load i64, i64 addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/r600.alu-limits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/r600.alu-limits.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/r600.alu-limits.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/r600.alu-limits.ll Tue Mar 21 16:39:51 2017
@@ -6,7 +6,7 @@
%struct.foo = type {i32, i32, i32}
-define void @alu_limits(i32 addrspace(1)* %out, %struct.foo* %in, i32 %offset) {
+define amdgpu_kernel void @alu_limits(i32 addrspace(1)* %out, %struct.foo* %in, i32 %offset) {
entry:
%ptr = getelementptr inbounds %struct.foo, %struct.foo* %in, i32 1, i32 2
%x = load i32, i32 *%ptr, align 4
Modified: llvm/trunk/test/CodeGen/AMDGPU/r600.bitcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/r600.bitcast.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/r600.bitcast.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/r600.bitcast.ll Tue Mar 21 16:39:51 2017
@@ -8,7 +8,7 @@
; EG: VTX_READ_128 [[DATA]], [[LD_PTR:T[0-9]+\.[XYZW]]]
; EG-DAG: MOV {{[\* ]*}}[[LD_PTR]], KC0[2].Z
; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
-define void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) {
+define amdgpu_kernel void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%0 = bitcast i8 addrspace(1)* %in to <16 x i8> addrspace(1)*
%1 = load <16 x i8>, <16 x i8> addrspace(1)* %0
@@ -21,7 +21,7 @@ entry:
; EG: VTX_READ_32 [[DATA]], [[LD_PTR:T[0-9]+\.[XYZW]]]
; EG-DAG: MOV {{[\* ]*}}[[LD_PTR]], KC0[2].Z
; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
-define void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
%load = load float, float addrspace(1)* %in, align 4
%bc = bitcast float %load to <2 x i16>
store <2 x i16> %bc, <2 x i16> addrspace(1)* %out, align 4
@@ -33,7 +33,7 @@ define void @f32_to_v2i16(<2 x i16> addr
; EG: VTX_READ_32 [[DATA]], [[LD_PTR:T[0-9]+\.[XYZW]]]
; EG-DAG: MOV {{[\* ]*}}[[LD_PTR]], KC0[2].Z
; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
-define void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind {
%load = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4
%bc = bitcast <2 x i16> %load to float
store float %bc, float addrspace(1)* %out, align 4
@@ -45,7 +45,7 @@ define void @v2i16_to_f32(float addrspac
; EG: VTX_READ_32 [[DATA]], [[LD_PTR:T[0-9]+\.[XYZW]]]
; EG-DAG: MOV {{[\* ]*}}[[LD_PTR]], KC0[2].Z
; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
-define void @v4i8_to_i32(i32 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @v4i8_to_i32(i32 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
%load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
%bc = bitcast <4 x i8> %load to i32
store i32 %bc, i32 addrspace(1)* %out, align 4
@@ -57,7 +57,7 @@ define void @v4i8_to_i32(i32 addrspace(1
; EG: VTX_READ_32 [[DATA]], [[LD_PTR:T[0-9]+\.[XYZW]]]
; EG-DAG: MOV {{[\* ]*}}[[LD_PTR]], KC0[2].Z
; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
-define void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%load = load i32, i32 addrspace(1)* %in, align 4
%bc = bitcast i32 %load to <4 x i8>
store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4
@@ -69,7 +69,7 @@ define void @i32_to_v4i8(<4 x i8> addrsp
; EG: VTX_READ_32 [[DATA]], [[LD_PTR:T[0-9]+\.[XYZW]]]
; EG-DAG: MOV {{[\* ]*}}[[LD_PTR]], KC0[2].Z
; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
-define void @v2i16_to_v4i8(<4 x i8> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @v2i16_to_v4i8(<4 x i8> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind {
%load = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4
%bc = bitcast <2 x i16> %load to <4 x i8>
store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4
@@ -85,7 +85,7 @@ define void @v2i16_to_v4i8(<4 x i8> addr
; EG: VTX_READ_16
; EG-DAG: BFE_UINT
; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
-define void @v4i16_extract_i8(i8 addrspace(1)* %out, <4 x i16> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @v4i16_extract_i8(i8 addrspace(1)* %out, <4 x i16> addrspace(1)* %in) nounwind {
%load = load <4 x i16>, <4 x i16> addrspace(1)* %in, align 2
%bc = bitcast <4 x i16> %load to <8 x i8>
%element = extractelement <8 x i8> %bc, i32 5
@@ -98,7 +98,7 @@ define void @v4i16_extract_i8(i8 addrspa
; EG: VTX_READ_64 [[DATA]], [[LD_PTR:T[0-9]+\.[XYZW]]]
; EG-DAG: MOV {{[\* ]*}}[[LD_PTR]], KC0[2].Z
; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
-define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
%bc = bitcast <2 x i32> %val to double
store double %bc, double addrspace(1)* %out, align 8
Modified: llvm/trunk/test/CodeGen/AMDGPU/r600.global_atomics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/r600.global_atomics.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/r600.global_atomics.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/r600.global_atomics.ll Tue Mar 21 16:39:51 2017
@@ -6,7 +6,7 @@
; FUNC-LABEL: {{^}}atomic_add_i32_offset:
; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
%val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
@@ -16,7 +16,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_add_i32_soffset:
; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_add_i32_soffset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32_soffset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i64 9000
%val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
@@ -27,7 +27,7 @@ entry:
; FIXME: looks like the offset is wrong
; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_add_i32_huge_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32_huge_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i64 47224239175595
@@ -38,7 +38,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_add_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
@@ -49,7 +49,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_add_i32:
; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
ret void
@@ -58,7 +58,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_add_i32_addr64:
; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
@@ -68,7 +68,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_and_i32_offset:
; EG: MEM_RAT ATOMIC_AND [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
%val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
@@ -78,7 +78,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_and_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_AND [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
@@ -89,7 +89,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_and_i32:
; EG: MEM_RAT ATOMIC_AND [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
ret void
@@ -98,7 +98,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_and_i32_addr64:
; EG: MEM_RAT ATOMIC_AND [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
@@ -108,7 +108,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_sub_i32_offset:
; EG: MEM_RAT ATOMIC_SUB [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
%val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
@@ -118,7 +118,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_sub_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_SUB [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
@@ -129,7 +129,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_sub_i32:
; EG: MEM_RAT ATOMIC_SUB [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
ret void
@@ -138,7 +138,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_sub_i32_addr64:
; EG: MEM_RAT ATOMIC_SUB [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
@@ -148,7 +148,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_max_i32_offset:
; EG: MEM_RAT ATOMIC_MAX_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
%val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
@@ -158,7 +158,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_max_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_MAX_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
@@ -169,7 +169,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_max_i32:
; EG: MEM_RAT ATOMIC_MAX_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
ret void
@@ -178,7 +178,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_max_i32_addr64:
; EG: MEM_RAT ATOMIC_MAX_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
@@ -188,7 +188,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_umax_i32_offset:
; EG: MEM_RAT ATOMIC_MAX_UINT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
%val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
@@ -198,7 +198,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_umax_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_MAX_UINT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
@@ -209,7 +209,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_umax_i32:
; EG: MEM_RAT ATOMIC_MAX_UINT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
ret void
@@ -218,7 +218,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_umax_i32_addr64:
; EG: MEM_RAT ATOMIC_MAX_UINT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
@@ -228,7 +228,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_min_i32_offset:
; EG: MEM_RAT ATOMIC_MIN_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
%val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
@@ -238,7 +238,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_min_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_MIN_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
@@ -249,7 +249,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_min_i32:
; EG: MEM_RAT ATOMIC_MIN_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
ret void
@@ -258,7 +258,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_min_i32_addr64:
; EG: MEM_RAT ATOMIC_MIN_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
@@ -268,7 +268,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_umin_i32_offset:
; EG: MEM_RAT ATOMIC_MIN_UINT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
%val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
@@ -278,7 +278,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_umin_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_MIN_UINT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
@@ -289,7 +289,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_umin_i32:
; EG: MEM_RAT ATOMIC_MIN_UINT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
ret void
@@ -298,7 +298,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_umin_i32_addr64:
; EG: MEM_RAT ATOMIC_MIN_UINT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
@@ -308,7 +308,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_or_i32_offset:
; EG: MEM_RAT ATOMIC_OR [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
%val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
@@ -318,7 +318,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_or_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_OR [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
@@ -329,7 +329,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_or_i32:
; EG: MEM_RAT ATOMIC_OR [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
ret void
@@ -338,7 +338,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_or_i32_addr64:
; EG: MEM_RAT ATOMIC_OR [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
@@ -348,7 +348,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_xchg_i32_offset:
; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
%val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
@@ -358,7 +358,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
@@ -369,7 +369,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_xchg_i32:
; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
ret void
@@ -378,7 +378,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64:
; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
@@ -388,7 +388,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_offset:
; EG: MEM_RAT ATOMIC_CMPXCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
%val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
@@ -398,7 +398,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_CMPXCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
@@ -409,7 +409,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_cmpxchg_i32:
; EG: MEM_RAT ATOMIC_CMPXCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) {
entry:
%val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
ret void
@@ -418,7 +418,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_addr64:
; EG: MEM_RAT ATOMIC_CMPXCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_cmpxchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
@@ -428,7 +428,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_xor_i32_offset:
; EG: MEM_RAT ATOMIC_XOR [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
%val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
@@ -438,7 +438,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_xor_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_XOR [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
@@ -449,7 +449,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_xor_i32:
; EG: MEM_RAT ATOMIC_XOR [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
ret void
@@ -458,7 +458,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_xor_i32_addr64:
; EG: MEM_RAT ATOMIC_XOR [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
@@ -468,7 +468,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_store_i32_offset:
; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Y
-define void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4
@@ -478,7 +478,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_store_i32:
; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Y
-define void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) {
entry:
store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4
ret void
@@ -487,7 +487,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_store_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Y
-define void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) {
+define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
@@ -498,7 +498,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_store_i32_addr64:
; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Y
-define void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) {
+define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
store atomic i32 %in, i32 addrspace(1)* %ptr seq_cst, align 4
@@ -507,7 +507,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_inc_add
; EG: MEM_RAT ATOMIC_INC_UINT
-define void @atomic_inc_add(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @atomic_inc_add(i32 addrspace(1)* %out) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
%val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 1 seq_cst
@@ -516,7 +516,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_dec_add
; EG: MEM_RAT ATOMIC_DEC_UINT
-define void @atomic_dec_add(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @atomic_dec_add(i32 addrspace(1)* %out) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
%val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 -1 seq_cst
@@ -525,7 +525,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_inc_sub
; EG: MEM_RAT ATOMIC_INC_UINT
-define void @atomic_inc_sub(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @atomic_inc_sub(i32 addrspace(1)* %out) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
%val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 -1 seq_cst
@@ -534,7 +534,7 @@ entry:
; FUNC-LABEL: {{^}}atomic_dec_sub
; EG: MEM_RAT ATOMIC_DEC_UINT
-define void @atomic_dec_sub(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @atomic_dec_sub(i32 addrspace(1)* %out) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
%val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 1 seq_cst
Modified: llvm/trunk/test/CodeGen/AMDGPU/r600.private-memory.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/r600.private-memory.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/r600.private-memory.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/r600.private-memory.ll Tue Mar 21 16:39:51 2017
@@ -10,7 +10,7 @@ declare i32 @llvm.r600.read.tidig.x() no
; Additional check in case the move ends up in the last slot
; R600-NOT: MOV * TO.X
-define void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = alloca [2 x i32]
%1 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll Tue Mar 21 16:39:51 2017
@@ -2,7 +2,7 @@
; FUNC-LABEL: {{^}}tgid_x:
; EG: MEM_RAT_CACHELESS STORE_RAW T1.X
-define void @tgid_x(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @tgid_x(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.x() #0
store i32 %0, i32 addrspace(1)* %out
@@ -11,7 +11,7 @@ entry:
; FUNC-LABEL: {{^}}tgid_y:
; EG: MEM_RAT_CACHELESS STORE_RAW T1.Y
-define void @tgid_y(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @tgid_y(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.y() #0
store i32 %0, i32 addrspace(1)* %out
@@ -20,7 +20,7 @@ entry:
; FUNC-LABEL: {{^}}tgid_z:
; EG: MEM_RAT_CACHELESS STORE_RAW T1.Z
-define void @tgid_z(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @tgid_z(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.z() #0
store i32 %0, i32 addrspace(1)* %out
@@ -29,7 +29,7 @@ entry:
; FUNC-LABEL: {{^}}tidig_x:
; EG: MEM_RAT_CACHELESS STORE_RAW T0.X
-define void @tidig_x(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @tidig_x(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() #0
store i32 %0, i32 addrspace(1)* %out
@@ -38,7 +38,7 @@ entry:
; FUNC-LABEL: {{^}}tidig_y:
; EG: MEM_RAT_CACHELESS STORE_RAW T0.Y
-define void @tidig_y(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @tidig_y(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.y() #0
store i32 %0, i32 addrspace(1)* %out
@@ -47,7 +47,7 @@ entry:
; FUNC-LABEL: {{^}}tidig_z:
; EG: MEM_RAT_CACHELESS STORE_RAW T0.Z
-define void @tidig_z(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @tidig_z(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.z() #0
store i32 %0, i32 addrspace(1)* %out
@@ -57,7 +57,7 @@ entry:
; FUNC-LABEL: {{^}}test_implicit:
; 36 prepended implicit bytes + 4(out pointer) + 4*4 = 56
; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 56
-define void @test_implicit(i32 addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 {
%implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr()
%header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)*
%gep = getelementptr i32, i32 addrspace(7)* %header.ptr, i32 4
@@ -69,7 +69,7 @@ define void @test_implicit(i32 addrspace
; FUNC-LABEL: {{^}}test_implicit_dyn:
; 36 prepended implicit bytes + 8(out pointer + in) = 44
; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 44
-define void @test_implicit_dyn(i32 addrspace(1)* %out, i32 %in) #1 {
+define amdgpu_kernel void @test_implicit_dyn(i32 addrspace(1)* %out, i32 %in) #1 {
%implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr()
%header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)*
%gep = getelementptr i32, i32 addrspace(7)* %header.ptr, i32 %in
Modified: llvm/trunk/test/CodeGen/AMDGPU/rcp-pattern.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/rcp-pattern.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/rcp-pattern.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/rcp-pattern.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@
; GCN: buffer_store_dword [[RCP]]
; EG: RECIP_IEEE
-define void @rcp_pat_f32(float addrspace(1)* %out, float %src) #0 {
+define amdgpu_kernel void @rcp_pat_f32(float addrspace(1)* %out, float %src) #0 {
%rcp = fdiv float 1.0, %src
store float %rcp, float addrspace(1)* %out, align 4
ret void
@@ -21,7 +21,7 @@ define void @rcp_pat_f32(float addrspace
; GCN: buffer_store_dword [[RCP]]
; EG: RECIP_IEEE
-define void @rcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
+define amdgpu_kernel void @rcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
%rcp = fdiv float 1.0, %src, !fpmath !0
store float %rcp, float addrspace(1)* %out, align 4
ret void
@@ -33,7 +33,7 @@ define void @rcp_ulp25_pat_f32(float add
; GCN: buffer_store_dword [[RCP]]
; EG: RECIP_IEEE
-define void @rcp_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
+define amdgpu_kernel void @rcp_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
%rcp = fdiv fast float 1.0, %src, !fpmath !0
store float %rcp, float addrspace(1)* %out, align 4
ret void
@@ -45,7 +45,7 @@ define void @rcp_fast_ulp25_pat_f32(floa
; GCN: buffer_store_dword [[RCP]]
; EG: RECIP_IEEE
-define void @rcp_arcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
+define amdgpu_kernel void @rcp_arcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
%rcp = fdiv arcp float 1.0, %src, !fpmath !0
store float %rcp, float addrspace(1)* %out, align 4
ret void
@@ -57,7 +57,7 @@ define void @rcp_arcp_ulp25_pat_f32(floa
; GCN: buffer_store_dword [[RCP]]
; EG: RECIP_IEEE
-define void @rcp_global_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #2 {
+define amdgpu_kernel void @rcp_global_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #2 {
%rcp = fdiv float 1.0, %src, !fpmath !0
store float %rcp, float addrspace(1)* %out, align 4
ret void
@@ -69,7 +69,7 @@ define void @rcp_global_fast_ulp25_pat_f
; GCN: buffer_store_dword [[RCP]]
; EG: RECIP_IEEE
-define void @rcp_fabs_pat_f32(float addrspace(1)* %out, float %src) #0 {
+define amdgpu_kernel void @rcp_fabs_pat_f32(float addrspace(1)* %out, float %src) #0 {
%src.fabs = call float @llvm.fabs.f32(float %src)
%rcp = fdiv float 1.0, %src.fabs
store float %rcp, float addrspace(1)* %out, align 4
@@ -82,7 +82,7 @@ define void @rcp_fabs_pat_f32(float addr
; GCN: buffer_store_dword [[RCP]]
; EG: RECIP_IEEE
-define void @neg_rcp_pat_f32(float addrspace(1)* %out, float %src) #0 {
+define amdgpu_kernel void @neg_rcp_pat_f32(float addrspace(1)* %out, float %src) #0 {
%rcp = fdiv float -1.0, %src
store float %rcp, float addrspace(1)* %out, align 4
ret void
@@ -92,7 +92,7 @@ define void @neg_rcp_pat_f32(float addrs
; GCN: s_load_dword [[SRC:s[0-9]+]]
; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -|[[SRC]]|
; GCN: buffer_store_dword [[RCP]]
-define void @rcp_fabs_fneg_pat_f32(float addrspace(1)* %out, float %src) #0 {
+define amdgpu_kernel void @rcp_fabs_fneg_pat_f32(float addrspace(1)* %out, float %src) #0 {
%src.fabs = call float @llvm.fabs.f32(float %src)
%src.fabs.fneg = fsub float -0.0, %src.fabs
%rcp = fdiv float 1.0, %src.fabs.fneg
@@ -106,7 +106,7 @@ define void @rcp_fabs_fneg_pat_f32(float
; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[SRC]], -|[[SRC]]|
; GCN: buffer_store_dword [[RCP]]
; GCN: buffer_store_dword [[MUL]]
-define void @rcp_fabs_fneg_pat_multi_use_f32(float addrspace(1)* %out, float %src) #0 {
+define amdgpu_kernel void @rcp_fabs_fneg_pat_multi_use_f32(float addrspace(1)* %out, float %src) #0 {
%src.fabs = call float @llvm.fabs.f32(float %src)
%src.fabs.fneg = fsub float -0.0, %src.fabs
%rcp = fdiv float 1.0, %src.fabs.fneg
@@ -120,7 +120,7 @@ define void @rcp_fabs_fneg_pat_multi_use
; FUNC-LABEL: {{^}}div_arcp_2_x_pat_f32:
; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0.5, v{{[0-9]+}}
; GCN: buffer_store_dword [[MUL]]
-define void @div_arcp_2_x_pat_f32(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @div_arcp_2_x_pat_f32(float addrspace(1)* %out) #0 {
%x = load float, float addrspace(1)* undef
%rcp = fdiv arcp float %x, 2.0
store float %rcp, float addrspace(1)* %out, align 4
@@ -130,7 +130,7 @@ define void @div_arcp_2_x_pat_f32(float
; FUNC-LABEL: {{^}}div_arcp_k_x_pat_f32:
; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0x3dcccccd, v{{[0-9]+}}
; GCN: buffer_store_dword [[MUL]]
-define void @div_arcp_k_x_pat_f32(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @div_arcp_k_x_pat_f32(float addrspace(1)* %out) #0 {
%x = load float, float addrspace(1)* undef
%rcp = fdiv arcp float %x, 10.0
store float %rcp, float addrspace(1)* %out, align 4
@@ -140,7 +140,7 @@ define void @div_arcp_k_x_pat_f32(float
; FUNC-LABEL: {{^}}div_arcp_neg_k_x_pat_f32:
; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0xbdcccccd, v{{[0-9]+}}
; GCN: buffer_store_dword [[MUL]]
-define void @div_arcp_neg_k_x_pat_f32(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @div_arcp_neg_k_x_pat_f32(float addrspace(1)* %out) #0 {
%x = load float, float addrspace(1)* undef
%rcp = fdiv arcp float %x, -10.0
store float %rcp, float addrspace(1)* %out, align 4
Modified: llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-subtarget.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-subtarget.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-subtarget.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-subtarget.ll Tue Mar 21 16:39:51 2017
@@ -4,7 +4,7 @@
declare i32 @llvm.read_register.i32(metadata) #0
-define void @test_invalid_read_flat_scratch_lo(i32 addrspace(1)* %out) nounwind {
+define amdgpu_kernel void @test_invalid_read_flat_scratch_lo(i32 addrspace(1)* %out) nounwind {
store volatile i32 0, i32 addrspace(3)* undef
%m0 = call i32 @llvm.read_register.i32(metadata !0)
store i32 %m0, i32 addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-type-i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-type-i32.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-type-i32.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-type-i32.ll Tue Mar 21 16:39:51 2017
@@ -4,7 +4,7 @@
declare i32 @llvm.read_register.i32(metadata) #0
-define void @test_invalid_read_exec(i32 addrspace(1)* %out) nounwind {
+define amdgpu_kernel void @test_invalid_read_exec(i32 addrspace(1)* %out) nounwind {
store volatile i32 0, i32 addrspace(3)* undef
%m0 = call i32 @llvm.read_register.i32(metadata !0)
store i32 %m0, i32 addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-type-i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-type-i64.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-type-i64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/read-register-invalid-type-i64.ll Tue Mar 21 16:39:51 2017
@@ -4,7 +4,7 @@
declare i64 @llvm.read_register.i64(metadata) #0
-define void @test_invalid_read_m0(i64 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @test_invalid_read_m0(i64 addrspace(1)* %out) #0 {
%exec = call i64 @llvm.read_register.i64(metadata !0)
store i64 %exec, i64 addrspace(1)* %out
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/read_register.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/read_register.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/read_register.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/read_register.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@ declare i64 @llvm.read_register.i64(meta
; CHECK: s_mov_b32 [[COPY_M0:s[0-9]+]], m0
; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], [[COPY_M0]]
; CHECK: buffer_store_dword [[COPY]]
-define void @test_read_m0(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @test_read_m0(i32 addrspace(1)* %out) #0 {
store volatile i32 0, i32 addrspace(3)* undef
%m0 = call i32 @llvm.read_register.i32(metadata !0)
store i32 %m0, i32 addrspace(1)* %out
@@ -20,7 +20,7 @@ define void @test_read_m0(i32 addrspace(
; CHECK: v_mov_b32_e32 v[[LO:[0-9]+]], exec_lo
; CHECK: v_mov_b32_e32 v[[HI:[0-9]+]], exec_hi
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @test_read_exec(i64 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @test_read_exec(i64 addrspace(1)* %out) #0 {
%exec = call i64 @llvm.read_register.i64(metadata !1)
store i64 %exec, i64 addrspace(1)* %out
ret void
@@ -30,7 +30,7 @@ define void @test_read_exec(i64 addrspac
; CHECK: v_mov_b32_e32 v[[LO:[0-9]+]], flat_scratch_lo
; CHECK: v_mov_b32_e32 v[[HI:[0-9]+]], flat_scratch_hi
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @test_read_flat_scratch(i64 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @test_read_flat_scratch(i64 addrspace(1)* %out) #0 {
%flat_scratch = call i64 @llvm.read_register.i64(metadata !2)
store i64 %flat_scratch, i64 addrspace(1)* %out
ret void
@@ -39,7 +39,7 @@ define void @test_read_flat_scratch(i64
; CHECK-LABEL: {{^}}test_read_flat_scratch_lo:
; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], flat_scratch_lo
; CHECK: buffer_store_dword [[COPY]]
-define void @test_read_flat_scratch_lo(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @test_read_flat_scratch_lo(i32 addrspace(1)* %out) #0 {
%flat_scratch_lo = call i32 @llvm.read_register.i32(metadata !3)
store i32 %flat_scratch_lo, i32 addrspace(1)* %out
ret void
@@ -48,7 +48,7 @@ define void @test_read_flat_scratch_lo(i
; CHECK-LABEL: {{^}}test_read_flat_scratch_hi:
; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], flat_scratch_hi
; CHECK: buffer_store_dword [[COPY]]
-define void @test_read_flat_scratch_hi(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @test_read_flat_scratch_hi(i32 addrspace(1)* %out) #0 {
%flat_scratch_hi = call i32 @llvm.read_register.i32(metadata !4)
store i32 %flat_scratch_hi, i32 addrspace(1)* %out
ret void
@@ -57,7 +57,7 @@ define void @test_read_flat_scratch_hi(i
; CHECK-LABEL: {{^}}test_read_exec_lo:
; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], exec_lo
; CHECK: buffer_store_dword [[COPY]]
-define void @test_read_exec_lo(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @test_read_exec_lo(i32 addrspace(1)* %out) #0 {
%exec_lo = call i32 @llvm.read_register.i32(metadata !5)
store i32 %exec_lo, i32 addrspace(1)* %out
ret void
@@ -66,7 +66,7 @@ define void @test_read_exec_lo(i32 addrs
; CHECK-LABEL: {{^}}test_read_exec_hi:
; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], exec_hi
; CHECK: buffer_store_dword [[COPY]]
-define void @test_read_exec_hi(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @test_read_exec_hi(i32 addrspace(1)* %out) #0 {
%exec_hi = call i32 @llvm.read_register.i32(metadata !6)
store i32 %exec_hi, i32 addrspace(1)* %out
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/readcyclecounter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/readcyclecounter.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/readcyclecounter.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/readcyclecounter.ll Tue Mar 21 16:39:51 2017
@@ -13,7 +13,7 @@ declare i64 @llvm.readcyclecounter() #0
; SI: s_memtime s{{\[[0-9]+:[0-9]+\]}}
; VI: s_memrealtime s{{\[[0-9]+:[0-9]+\]}}
; GCN: store_dwordx2
-define void @test_readcyclecounter(i64 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @test_readcyclecounter(i64 addrspace(1)* %out) #0 {
%cycle0 = call i64 @llvm.readcyclecounter()
store volatile i64 %cycle0, i64 addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll Tue Mar 21 16:39:51 2017
@@ -6,7 +6,7 @@
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, [[VAL]]
; GCN: buffer_store_dwordx2
-define void @reduce_i64_load_align_4_width_to_i32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @reduce_i64_load_align_4_width_to_i32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
%a = load i64, i64 addrspace(1)* %in, align 4
%and = and i64 %a, 1234567
store i64 %and, i64 addrspace(1)* %out, align 8
@@ -16,7 +16,7 @@ define void @reduce_i64_load_align_4_wid
; GCN-LABEL: {{^}}reduce_i64_align_4_bitcast_v2i32_elt0:
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: buffer_store_dword [[VAL]]
-define void @reduce_i64_align_4_bitcast_v2i32_elt0(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @reduce_i64_align_4_bitcast_v2i32_elt0(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
%a = load i64, i64 addrspace(1)* %in, align 4
%vec = bitcast i64 %a to <2 x i32>
%elt0 = extractelement <2 x i32> %vec, i32 0
@@ -27,7 +27,7 @@ define void @reduce_i64_align_4_bitcast_
; GCN-LABEL: {{^}}reduce_i64_align_4_bitcast_v2i32_elt1:
; GCN: buffer_load_dword [[VAL:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4
; GCN: buffer_store_dword [[VAL]]
-define void @reduce_i64_align_4_bitcast_v2i32_elt1(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @reduce_i64_align_4_bitcast_v2i32_elt1(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
%a = load i64, i64 addrspace(1)* %in, align 4
%vec = bitcast i64 %a to <2 x i32>
%elt0 = extractelement <2 x i32> %vec, i32 1
Modified: llvm/trunk/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll Tue Mar 21 16:39:51 2017
@@ -3,7 +3,7 @@
; GCN-LABEL: {{^}}store_v2i32_as_v4i16_align_4:
; GCN: s_load_dwordx2
; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
-define void @store_v2i32_as_v4i16_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) #0 {
+define amdgpu_kernel void @store_v2i32_as_v4i16_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) #0 {
%x.bc = bitcast <2 x i32> %x to <4 x i16>
store <4 x i16> %x.bc, <4 x i16> addrspace(3)* %out, align 4
ret void
@@ -13,7 +13,7 @@ define void @store_v2i32_as_v4i16_align_
; GCN: s_load_dwordx4
; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
-define void @store_v4i32_as_v8i16_align_4(<8 x i16> addrspace(3)* align 4 %out, <4 x i32> %x) #0 {
+define amdgpu_kernel void @store_v4i32_as_v8i16_align_4(<8 x i16> addrspace(3)* align 4 %out, <4 x i32> %x) #0 {
%x.bc = bitcast <4 x i32> %x to <8 x i16>
store <8 x i16> %x.bc, <8 x i16> addrspace(3)* %out, align 4
ret void
@@ -22,7 +22,7 @@ define void @store_v4i32_as_v8i16_align_
; GCN-LABEL: {{^}}store_v2i32_as_i64_align_4:
; GCN: s_load_dwordx2
; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
-define void @store_v2i32_as_i64_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) #0 {
+define amdgpu_kernel void @store_v2i32_as_i64_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) #0 {
%x.bc = bitcast <2 x i32> %x to <4 x i16>
store <4 x i16> %x.bc, <4 x i16> addrspace(3)* %out, align 4
ret void
@@ -32,7 +32,7 @@ define void @store_v2i32_as_i64_align_4(
; GCN: s_load_dwordx4
; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
-define void @store_v4i32_as_v2i64_align_4(<2 x i64> addrspace(3)* align 4 %out, <4 x i32> %x) #0 {
+define amdgpu_kernel void @store_v4i32_as_v2i64_align_4(<2 x i64> addrspace(3)* align 4 %out, <4 x i32> %x) #0 {
%x.bc = bitcast <4 x i32> %x to <2 x i64>
store <2 x i64> %x.bc, <2 x i64> addrspace(3)* %out, align 4
ret void
@@ -44,7 +44,7 @@ define void @store_v4i32_as_v2i64_align_
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
-define void @store_v4i16_as_v2i32_align_4(<2 x i32> addrspace(3)* align 4 %out, <4 x i16> %x) #0 {
+define amdgpu_kernel void @store_v4i16_as_v2i32_align_4(<2 x i32> addrspace(3)* align 4 %out, <4 x i16> %x) #0 {
%x.bc = bitcast <4 x i16> %x to <2 x i32>
store <2 x i32> %x.bc, <2 x i32> addrspace(3)* %out, align 4
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll Tue Mar 21 16:39:51 2017
@@ -6,7 +6,7 @@
declare i32 @llvm.amdgcn.workitem.id.x() #0
-define void @reg_coalescer_breaks_dead(<2 x i32> addrspace(1)* nocapture readonly %arg, i32 %arg1, i32 %arg2, i32 %arg3) #1 {
+define amdgpu_kernel void @reg_coalescer_breaks_dead(<2 x i32> addrspace(1)* nocapture readonly %arg, i32 %arg1, i32 %arg2, i32 %arg3) #1 {
bb:
%id.x = call i32 @llvm.amdgcn.workitem.id.x()
%cmp0 = icmp eq i32 %id.x, 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/regcoalesce-dbg.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/regcoalesce-dbg.mir?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/regcoalesce-dbg.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/regcoalesce-dbg.mir Tue Mar 21 16:39:51 2017
@@ -8,7 +8,7 @@
# CHECK: DBG_VALUE{{.*}}debug-use %13.sub2
--- |
- define void @test(i32 addrspace(1)* %out) { ret void }
+ define amdgpu_kernel void @test(i32 addrspace(1)* %out) { ret void }
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !4, producer: "llvm", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !4)
!1 = !DILocalVariable(name: "a", scope: !2, file: !4, line: 126, type: !6)
Modified: llvm/trunk/test/CodeGen/AMDGPU/register-count-comments.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/register-count-comments.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/register-count-comments.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/register-count-comments.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@ declare i32 @llvm.amdgcn.mbcnt.hi(i32, i
; SI: ; Kernel info:
; SI: ; NumSgprs: {{[0-9]+}}
; SI: ; NumVgprs: {{[0-9]+}}
-define void @foo(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %abase, i32 addrspace(1)* %bbase) nounwind {
+define amdgpu_kernel void @foo(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %abase, i32 addrspace(1)* %bbase) nounwind {
%mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0);
%tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo)
%aptr = getelementptr i32, i32 addrspace(1)* %abase, i32 %tid
@@ -24,7 +24,7 @@ define void @foo(i32 addrspace(1)* noali
; SI-LABEL: {{^}}one_vgpr_used:
; SI: NumVgprs: 1
-define void @one_vgpr_used(i32 addrspace(1)* %out, i32 %x) nounwind {
+define amdgpu_kernel void @one_vgpr_used(i32 addrspace(1)* %out, i32 %x) nounwind {
store i32 %x, i32 addrspace(1)* %out, align 4
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/rename-disconnected-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/rename-disconnected-bug.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/rename-disconnected-bug.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/rename-disconnected-bug.ll Tue Mar 21 16:39:51 2017
@@ -3,7 +3,7 @@
; definition on every path (there should at least be IMPLICIT_DEF instructions).
target triple = "amdgcn--"
-define void @func() {
+define amdgpu_kernel void @func() {
B0:
br i1 undef, label %B1, label %B2
Modified: llvm/trunk/test/CodeGen/AMDGPU/rename-independent-subregs.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/rename-independent-subregs.mir?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/rename-independent-subregs.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/rename-independent-subregs.mir Tue Mar 21 16:39:51 2017
@@ -1,7 +1,7 @@
# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass simple-register-coalescing,rename-independent-subregs -o - %s | FileCheck %s
--- |
- define void @test0() { ret void }
- define void @test1() { ret void }
+ define amdgpu_kernel void @test0() { ret void }
+ define amdgpu_kernel void @test1() { ret void }
...
---
# In the test below we have two independent def+use pairs of subregister1 which
Modified: llvm/trunk/test/CodeGen/AMDGPU/reorder-stores.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/reorder-stores.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/reorder-stores.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/reorder-stores.ll Tue Mar 21 16:39:51 2017
@@ -7,7 +7,7 @@
; SI: buffer_store_dwordx4
; SI: buffer_store_dwordx4
; SI: s_endpgm
-define void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocapture %x, <2 x double> addrspace(1)* nocapture %y) nounwind {
+define amdgpu_kernel void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocapture %x, <2 x double> addrspace(1)* nocapture %y) nounwind {
%tmp1 = load <2 x double>, <2 x double> addrspace(1)* %x, align 16
%tmp4 = load <2 x double>, <2 x double> addrspace(1)* %y, align 16
store <2 x double> %tmp4, <2 x double> addrspace(1)* %x, align 16
@@ -19,7 +19,7 @@ define void @no_reorder_v2f64_global_loa
; SI: ds_read2_b64
; SI: ds_write2_b64
; SI: s_endpgm
-define void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace(3)* nocapture %x, <2 x double> addrspace(3)* nocapture %y) nounwind {
+define amdgpu_kernel void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace(3)* nocapture %x, <2 x double> addrspace(3)* nocapture %y) nounwind {
%tmp1 = load <2 x double>, <2 x double> addrspace(3)* %x, align 16
%tmp4 = load <2 x double>, <2 x double> addrspace(3)* %y, align 16
store <2 x double> %tmp4, <2 x double> addrspace(3)* %x, align 16
@@ -39,7 +39,7 @@ define void @no_reorder_scalarized_v2f64
; SI: buffer_store_dwordx4
; SI: buffer_store_dwordx4
; SI: s_endpgm
-define void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* nocapture %x, <8 x i32> addrspace(1)* nocapture %y) nounwind {
+define amdgpu_kernel void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* nocapture %x, <8 x i32> addrspace(1)* nocapture %y) nounwind {
%tmp1 = load <8 x i32>, <8 x i32> addrspace(1)* %x, align 32
%tmp4 = load <8 x i32>, <8 x i32> addrspace(1)* %y, align 32
store <8 x i32> %tmp4, <8 x i32> addrspace(1)* %x, align 32
@@ -54,7 +54,7 @@ define void @no_reorder_split_v8i32_glob
; SI-NOT: ds_read
; SI: ds_write_b64
; SI: s_endpgm
-define void @no_reorder_extload_64(<2 x i32> addrspace(3)* nocapture %x, <2 x i32> addrspace(3)* nocapture %y) nounwind {
+define amdgpu_kernel void @no_reorder_extload_64(<2 x i32> addrspace(3)* nocapture %x, <2 x i32> addrspace(3)* nocapture %y) nounwind {
%tmp1 = load <2 x i32>, <2 x i32> addrspace(3)* %x, align 8
%tmp4 = load <2 x i32>, <2 x i32> addrspace(3)* %y, align 8
%tmp1ext = zext <2 x i32> %tmp1 to <2 x i64>
Modified: llvm/trunk/test/CodeGen/AMDGPU/rotl.i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/rotl.i64.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/rotl.i64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/rotl.i64.ll Tue Mar 21 16:39:51 2017
@@ -7,7 +7,7 @@
; BOTH-DAG: s_lshr_b64
; BOTH: s_or_b64
; BOTH: s_endpgm
-define void @s_rotl_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) {
+define amdgpu_kernel void @s_rotl_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) {
entry:
%0 = shl i64 %x, %y
%1 = sub i64 64, %y
@@ -26,7 +26,7 @@ entry:
; BOTH: v_or_b32
; BOTH: v_or_b32
; BOTH: s_endpgm
-define void @v_rotl_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
+define amdgpu_kernel void @v_rotl_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
entry:
%x = load i64, i64 addrspace(1)* %xptr, align 8
%y = load i64, i64 addrspace(1)* %yptr, align 8
Modified: llvm/trunk/test/CodeGen/AMDGPU/rotl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/rotl.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/rotl.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/rotl.ll Tue Mar 21 16:39:51 2017
@@ -10,7 +10,7 @@
; SI: s_sub_i32 [[SDST:s[0-9]+]], 32, {{[s][0-9]+}}
; SI: v_mov_b32_e32 [[VDST:v[0-9]+]], [[SDST]]
; SI: v_alignbit_b32 {{v[0-9]+, [s][0-9]+, s[0-9]+}}, [[VDST]]
-define void @rotl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) {
+define amdgpu_kernel void @rotl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) {
entry:
%0 = shl i32 %x, %y
%1 = sub i32 32, %y
@@ -26,7 +26,7 @@ entry:
; SI-DAG: v_alignbit_b32
; SI-DAG: v_alignbit_b32
; SI: s_endpgm
-define void @rotl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) {
+define amdgpu_kernel void @rotl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) {
entry:
%0 = shl <2 x i32> %x, %y
%1 = sub <2 x i32> <i32 32, i32 32>, %y
@@ -46,7 +46,7 @@ entry:
; SI-DAG: s_sub_i32
; SI-DAG: v_alignbit_b32
; SI: s_endpgm
-define void @rotl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) {
+define amdgpu_kernel void @rotl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) {
entry:
%0 = shl <4 x i32> %x, %y
%1 = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %y
Modified: llvm/trunk/test/CodeGen/AMDGPU/rotr.i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/rotr.i64.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/rotr.i64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/rotr.i64.ll Tue Mar 21 16:39:51 2017
@@ -6,7 +6,7 @@
; BOTH-DAG: s_lshr_b64
; BOTH-DAG: s_lshl_b64
; BOTH: s_or_b64
-define void @s_rotr_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) {
+define amdgpu_kernel void @s_rotr_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) {
entry:
%tmp0 = sub i64 64, %y
%tmp1 = shl i64 %x, %tmp0
@@ -24,7 +24,7 @@ entry:
; VI-DAG: v_lshlrev_b64
; BOTH: v_or_b32
; BOTH: v_or_b32
-define void @v_rotr_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
+define amdgpu_kernel void @v_rotr_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
entry:
%x = load i64, i64 addrspace(1)* %xptr, align 8
%y = load i64, i64 addrspace(1)* %yptr, align 8
@@ -37,7 +37,7 @@ entry:
}
; BOTH-LABEL: {{^}}s_rotr_v2i64:
-define void @s_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> %x, <2 x i64> %y) {
+define amdgpu_kernel void @s_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> %x, <2 x i64> %y) {
entry:
%tmp0 = sub <2 x i64> <i64 64, i64 64>, %y
%tmp1 = shl <2 x i64> %x, %tmp0
@@ -48,7 +48,7 @@ entry:
}
; BOTH-LABEL: {{^}}v_rotr_v2i64:
-define void @v_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> addrspace(1)* %xptr, <2 x i64> addrspace(1)* %yptr) {
+define amdgpu_kernel void @v_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> addrspace(1)* %xptr, <2 x i64> addrspace(1)* %yptr) {
entry:
%x = load <2 x i64>, <2 x i64> addrspace(1)* %xptr, align 8
%y = load <2 x i64>, <2 x i64> addrspace(1)* %yptr, align 8
Modified: llvm/trunk/test/CodeGen/AMDGPU/rotr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/rotr.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/rotr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/rotr.ll Tue Mar 21 16:39:51 2017
@@ -6,7 +6,7 @@
; R600: BIT_ALIGN_INT
; SI: v_alignbit_b32
-define void @rotr_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) {
+define amdgpu_kernel void @rotr_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) {
entry:
%tmp0 = sub i32 32, %y
%tmp1 = shl i32 %x, %tmp0
@@ -22,7 +22,7 @@ entry:
; SI: v_alignbit_b32
; SI: v_alignbit_b32
-define void @rotr_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) {
+define amdgpu_kernel void @rotr_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) {
entry:
%tmp0 = sub <2 x i32> <i32 32, i32 32>, %y
%tmp1 = shl <2 x i32> %x, %tmp0
@@ -42,7 +42,7 @@ entry:
; SI: v_alignbit_b32
; SI: v_alignbit_b32
; SI: v_alignbit_b32
-define void @rotr_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) {
+define amdgpu_kernel void @rotr_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) {
entry:
%tmp0 = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %y
%tmp1 = shl <4 x i32> %x, %tmp0
Modified: llvm/trunk/test/CodeGen/AMDGPU/rsq.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/rsq.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/rsq.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/rsq.ll Tue Mar 21 16:39:51 2017
@@ -8,7 +8,7 @@ declare double @llvm.sqrt.f64(double) no
; SI-LABEL: {{^}}rsq_f32:
; SI: v_rsq_f32_e32
; SI: s_endpgm
-define void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%val = load float, float addrspace(1)* %in, align 4
%sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
%div = fdiv float 1.0, %sqrt
@@ -20,7 +20,7 @@ define void @rsq_f32(float addrspace(1)*
; SI-UNSAFE: v_rsq_f64_e32
; SI-SAFE: v_sqrt_f64_e32
; SI: s_endpgm
-define void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
%val = load double, double addrspace(1)* %in, align 4
%sqrt = call double @llvm.sqrt.f64(double %val) nounwind readnone
%div = fdiv double 1.0, %sqrt
@@ -31,7 +31,7 @@ define void @rsq_f64(double addrspace(1)
; SI-LABEL: {{^}}rsq_f32_sgpr:
; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
; SI: s_endpgm
-define void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) nounwind {
+define amdgpu_kernel void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) nounwind {
%sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
%div = fdiv float 1.0, %sqrt
store float %div, float addrspace(1)* %out, align 4
@@ -55,7 +55,7 @@ define void @rsq_f32_sgpr(float addrspac
; SI-SAFE-NOT: v_rsq_f32
; SI: s_endpgm
-define void @rsqrt_fmul(float addrspace(1)* %out, float addrspace(1)* %in) {
+define amdgpu_kernel void @rsqrt_fmul(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@@ -81,7 +81,7 @@ define void @rsqrt_fmul(float addrspace(
; SI-UNSAFE: v_rsq_f32_e32 [[RSQ:v[0-9]+]], v{{[0-9]+}}
; SI-UNSAFE: v_xor_b32_e32 [[NEG_RSQ:v[0-9]+]], 0x80000000, [[RSQ]]
; SI-UNSAFE: buffer_store_dword [[NEG_RSQ]]
-define void @neg_rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @neg_rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%val = load float, float addrspace(1)* %in, align 4
%sqrt = call float @llvm.sqrt.f32(float %val)
%div = fdiv float -1.0, %sqrt
@@ -96,7 +96,7 @@ define void @neg_rsq_f32(float addrspace
; SI-UNSAFE: v_sqrt_f64_e32 [[SQRT:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}
; SI-UNSAFE: v_rcp_f64_e64 [[RCP:v\[[0-9]+:[0-9]+\]]], -[[SQRT]]
; SI-UNSAFE: buffer_store_dwordx2 [[RCP]]
-define void @neg_rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @neg_rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
%val = load double, double addrspace(1)* %in, align 4
%sqrt = call double @llvm.sqrt.f64(double %val)
%div = fdiv double -1.0, %sqrt
@@ -112,7 +112,7 @@ define void @neg_rsq_f64(double addrspac
; SI-UNSAFE: v_rsq_f32_e64 [[RSQ:v[0-9]+]], -v{{[0-9]+}}
; SI-UNSAFE: v_xor_b32_e32 [[NEG_RSQ:v[0-9]+]], 0x80000000, [[RSQ]]
; SI-UNSAFE: buffer_store_dword [[NEG_RSQ]]
-define void @neg_rsq_neg_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @neg_rsq_neg_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%val = load float, float addrspace(1)* %in, align 4
%val.fneg = fsub float -0.0, %val
%sqrt = call float @llvm.sqrt.f32(float %val.fneg)
@@ -128,7 +128,7 @@ define void @neg_rsq_neg_f32(float addrs
; SI-UNSAFE: v_sqrt_f64_e64 [[SQRT:v\[[0-9]+:[0-9]+\]]], -v{{\[[0-9]+:[0-9]+\]}}
; SI-UNSAFE: v_rcp_f64_e64 [[RCP:v\[[0-9]+:[0-9]+\]]], -[[SQRT]]
; SI-UNSAFE: buffer_store_dwordx2 [[RCP]]
-define void @neg_rsq_neg_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @neg_rsq_neg_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
%val = load double, double addrspace(1)* %in, align 4
%val.fneg = fsub double -0.0, %val
%sqrt = call double @llvm.sqrt.f64(double %val.fneg)
Modified: llvm/trunk/test/CodeGen/AMDGPU/s_addk_i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/s_addk_i32.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/s_addk_i32.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/s_addk_i32.ll Tue Mar 21 16:39:51 2017
@@ -7,7 +7,7 @@
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[VRESULT]]
; SI: s_endpgm
-define void @s_addk_i32_k0(i32 addrspace(1)* %out, i32 %b) {
+define amdgpu_kernel void @s_addk_i32_k0(i32 addrspace(1)* %out, i32 %b) {
%add = add i32 %b, 65
store i32 %add, i32 addrspace(1)* %out
ret void
@@ -19,7 +19,7 @@ define void @s_addk_i32_k0(i32 addrspace
; SI-DAG: s_add_i32 {{s[0-9]+}}, {{s[0-9]+}}, [[K]]
; SI-DAG: s_add_i32 {{s[0-9]+}}, {{s[0-9]+}}, [[K]]
; SI: s_endpgm
-define void @s_addk_i32_k0_x2(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %a, i32 %b) {
+define amdgpu_kernel void @s_addk_i32_k0_x2(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %a, i32 %b) {
%add0 = add i32 %a, 65
%add1 = add i32 %b, 65
store i32 %add0, i32 addrspace(1)* %out0
@@ -30,7 +30,7 @@ define void @s_addk_i32_k0_x2(i32 addrsp
; SI-LABEL: {{^}}s_addk_i32_k1:
; SI: s_addk_i32 {{s[0-9]+}}, 0x7fff{{$}}
; SI: s_endpgm
-define void @s_addk_i32_k1(i32 addrspace(1)* %out, i32 %b) {
+define amdgpu_kernel void @s_addk_i32_k1(i32 addrspace(1)* %out, i32 %b) {
%add = add i32 %b, 32767 ; (1 << 15) - 1
store i32 %add, i32 addrspace(1)* %out
ret void
@@ -39,7 +39,7 @@ define void @s_addk_i32_k1(i32 addrspace
; SI-LABEL: {{^}}s_addk_i32_k2:
; SI: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, 17
; SI: s_endpgm
-define void @s_addk_i32_k2(i32 addrspace(1)* %out, i32 %b) {
+define amdgpu_kernel void @s_addk_i32_k2(i32 addrspace(1)* %out, i32 %b) {
%add = add i32 %b, -17
store i32 %add, i32 addrspace(1)* %out
ret void
@@ -48,7 +48,7 @@ define void @s_addk_i32_k2(i32 addrspace
; SI-LABEL: {{^}}s_addk_i32_k3:
; SI: s_addk_i32 {{s[0-9]+}}, 0xffbf{{$}}
; SI: s_endpgm
-define void @s_addk_i32_k3(i32 addrspace(1)* %out, i32 %b) {
+define amdgpu_kernel void @s_addk_i32_k3(i32 addrspace(1)* %out, i32 %b) {
%add = add i32 %b, -65
store i32 %add, i32 addrspace(1)* %out
ret void
@@ -58,7 +58,7 @@ define void @s_addk_i32_k3(i32 addrspace
; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x41
; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x42
; SI: s_endpgm
-define void @s_addk_v2i32_k0(<2 x i32> addrspace(1)* %out, <2 x i32> %b) {
+define amdgpu_kernel void @s_addk_v2i32_k0(<2 x i32> addrspace(1)* %out, <2 x i32> %b) {
%add = add <2 x i32> %b, <i32 65, i32 66>
store <2 x i32> %add, <2 x i32> addrspace(1)* %out
ret void
@@ -70,7 +70,7 @@ define void @s_addk_v2i32_k0(<2 x i32> a
; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x43
; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x44
; SI: s_endpgm
-define void @s_addk_v4i32_k0(<4 x i32> addrspace(1)* %out, <4 x i32> %b) {
+define amdgpu_kernel void @s_addk_v4i32_k0(<4 x i32> addrspace(1)* %out, <4 x i32> %b) {
%add = add <4 x i32> %b, <i32 65, i32 66, i32 67, i32 68>
store <4 x i32> %add, <4 x i32> addrspace(1)* %out
ret void
@@ -86,7 +86,7 @@ define void @s_addk_v4i32_k0(<4 x i32> a
; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x47
; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x48
; SI: s_endpgm
-define void @s_addk_v8i32_k0(<8 x i32> addrspace(1)* %out, <8 x i32> %b) {
+define amdgpu_kernel void @s_addk_v8i32_k0(<8 x i32> addrspace(1)* %out, <8 x i32> %b) {
%add = add <8 x i32> %b, <i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72>
store <8 x i32> %add, <8 x i32> addrspace(1)* %out
ret void
@@ -95,7 +95,7 @@ define void @s_addk_v8i32_k0(<8 x i32> a
; SI-LABEL: {{^}}no_s_addk_i32_k0:
; SI: s_add_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x8000{{$}}
; SI: s_endpgm
-define void @no_s_addk_i32_k0(i32 addrspace(1)* %out, i32 %b) {
+define amdgpu_kernel void @no_s_addk_i32_k0(i32 addrspace(1)* %out, i32 %b) {
%add = add i32 %b, 32768 ; 1 << 15
store i32 %add, i32 addrspace(1)* %out
ret void
@@ -105,7 +105,7 @@ define void @no_s_addk_i32_k0(i32 addrsp
; SI-LABEL: {{^}}commute_s_addk_i32:
; SI: s_addk_i32 s{{[0-9]+}}, 0x800{{$}}
-define void @commute_s_addk_i32(i32 addrspace(1)* %out, i32 %b) #0 {
+define amdgpu_kernel void @commute_s_addk_i32(i32 addrspace(1)* %out, i32 %b) #0 {
%size = call i32 @llvm.amdgcn.groupstaticsize()
%add = add i32 %size, %b
call void asm sideeffect "; foo $0, $1", "v,s"([512 x i32] addrspace(3)* @lds, i32 %add)
Modified: llvm/trunk/test/CodeGen/AMDGPU/s_movk_i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/s_movk_i32.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/s_movk_i32.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/s_movk_i32.ll Tue Mar 21 16:39:51 2017
@@ -7,7 +7,7 @@
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
; SI: s_endpgm
-define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+define amdgpu_kernel void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 4295032831 ; ((1 << 16) - 1) | (1 << 32)
store i64 %or, i64 addrspace(1)* %out
@@ -21,7 +21,7 @@ define void @s_movk_i32_k0(i64 addrspace
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
; SI: s_endpgm
-define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+define amdgpu_kernel void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 4295000063 ; ((1 << 15) - 1) | (1 << 32)
store i64 %or, i64 addrspace(1)* %out
@@ -35,7 +35,7 @@ define void @s_movk_i32_k1(i64 addrspace
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 64, v[[HI_VREG]]
; SI: s_endpgm
-define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+define amdgpu_kernel void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 274877939711 ; ((1 << 15) - 1) | (64 << 32)
store i64 %or, i64 addrspace(1)* %out
@@ -49,7 +49,7 @@ define void @s_movk_i32_k2(i64 addrspace
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
; SI: s_endpgm
-define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+define amdgpu_kernel void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 4295000064 ; (1 << 15) | (1 << 32)
store i64 %or, i64 addrspace(1)* %out
@@ -63,7 +63,7 @@ define void @s_movk_i32_k3(i64 addrspace
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
; SI: s_endpgm
-define void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+define amdgpu_kernel void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 4295098368 ; (1 << 17) | (1 << 32)
store i64 %or, i64 addrspace(1)* %out
@@ -78,7 +78,7 @@ define void @s_movk_i32_k4(i64 addrspace
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
-define void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+define amdgpu_kernel void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 18374967954648334319 ; -17 & 0xff00ffffffffffff
store i64 %or, i64 addrspace(1)* %out
@@ -92,7 +92,7 @@ define void @s_movk_i32_k5(i64 addrspace
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 63, v[[HI_VREG]]
; SI: s_endpgm
-define void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+define amdgpu_kernel void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 270582939713 ; 65 | (63 << 32)
store i64 %or, i64 addrspace(1)* %out
@@ -107,7 +107,7 @@ define void @s_movk_i32_k6(i64 addrspace
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
-define void @s_movk_i32_k7(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+define amdgpu_kernel void @s_movk_i32_k7(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 70368744185856; ((1 << 13)) | ((1 << 14) << 32)
store i64 %or, i64 addrspace(1)* %out
@@ -122,7 +122,7 @@ define void @s_movk_i32_k7(i64 addrspace
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
-define void @s_movk_i32_k8(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+define amdgpu_kernel void @s_movk_i32_k8(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255906816 ; 0x11111111ffff8000
store i64 %or, i64 addrspace(1)* %out
@@ -137,7 +137,7 @@ define void @s_movk_i32_k8(i64 addrspace
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
-define void @s_movk_i32_k9(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+define amdgpu_kernel void @s_movk_i32_k9(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255906817 ; 0x11111111ffff8001
store i64 %or, i64 addrspace(1)* %out
@@ -152,7 +152,7 @@ define void @s_movk_i32_k9(i64 addrspace
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
-define void @s_movk_i32_k10(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+define amdgpu_kernel void @s_movk_i32_k10(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255909000 ; 0x11111111ffff8888
store i64 %or, i64 addrspace(1)* %out
@@ -167,7 +167,7 @@ define void @s_movk_i32_k10(i64 addrspac
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
-define void @s_movk_i32_k11(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+define amdgpu_kernel void @s_movk_i32_k11(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255910911 ; 0x11111111ffff8fff
store i64 %or, i64 addrspace(1)* %out
@@ -182,7 +182,7 @@ define void @s_movk_i32_k11(i64 addrspac
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
-define void @s_movk_i32_k12(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+define amdgpu_kernel void @s_movk_i32_k12(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255902721 ; 0x11111111ffff7001
store i64 %or, i64 addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/s_mulk_i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/s_mulk_i32.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/s_mulk_i32.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/s_mulk_i32.ll Tue Mar 21 16:39:51 2017
@@ -7,7 +7,7 @@
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[VRESULT]]
; SI: s_endpgm
-define void @s_mulk_i32_k0(i32 addrspace(1)* %out, i32 %b) {
+define amdgpu_kernel void @s_mulk_i32_k0(i32 addrspace(1)* %out, i32 %b) {
%mul = mul i32 %b, 65
store i32 %mul, i32 addrspace(1)* %out
ret void
@@ -16,7 +16,7 @@ define void @s_mulk_i32_k0(i32 addrspace
; SI-LABEL: {{^}}s_mulk_i32_k1:
; SI: s_mulk_i32 {{s[0-9]+}}, 0x7fff{{$}}
; SI: s_endpgm
-define void @s_mulk_i32_k1(i32 addrspace(1)* %out, i32 %b) {
+define amdgpu_kernel void @s_mulk_i32_k1(i32 addrspace(1)* %out, i32 %b) {
%mul = mul i32 %b, 32767 ; (1 << 15) - 1
store i32 %mul, i32 addrspace(1)* %out
ret void
@@ -25,7 +25,7 @@ define void @s_mulk_i32_k1(i32 addrspace
; SI-LABEL: {{^}}s_mulk_i32_k2:
; SI: s_mulk_i32 {{s[0-9]+}}, 0xffef{{$}}
; SI: s_endpgm
-define void @s_mulk_i32_k2(i32 addrspace(1)* %out, i32 %b) {
+define amdgpu_kernel void @s_mulk_i32_k2(i32 addrspace(1)* %out, i32 %b) {
%mul = mul i32 %b, -17
store i32 %mul, i32 addrspace(1)* %out
ret void
@@ -34,7 +34,7 @@ define void @s_mulk_i32_k2(i32 addrspace
; SI-LABEL: {{^}}no_s_mulk_i32_k0:
; SI: s_mul_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x8001{{$}}
; SI: s_endpgm
-define void @no_s_mulk_i32_k0(i32 addrspace(1)* %out, i32 %b) {
+define amdgpu_kernel void @no_s_mulk_i32_k0(i32 addrspace(1)* %out, i32 %b) {
%mul = mul i32 %b, 32769 ; 1 << 15 + 1
store i32 %mul, i32 addrspace(1)* %out
ret void
@@ -44,7 +44,7 @@ define void @no_s_mulk_i32_k0(i32 addrsp
; SI-LABEL: {{^}}commute_s_mulk_i32:
; SI: s_mulk_i32 s{{[0-9]+}}, 0x800{{$}}
-define void @commute_s_mulk_i32(i32 addrspace(1)* %out, i32 %b) #0 {
+define amdgpu_kernel void @commute_s_mulk_i32(i32 addrspace(1)* %out, i32 %b) #0 {
%size = call i32 @llvm.amdgcn.groupstaticsize()
%add = mul i32 %size, %b
call void asm sideeffect "; foo $0, $1", "v,s"([512 x i32] addrspace(3)* @lds, i32 %add)
Modified: llvm/trunk/test/CodeGen/AMDGPU/sad.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sad.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sad.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sad.ll Tue Mar 21 16:39:51 2017
@@ -2,7 +2,7 @@
; GCN-LABEL: {{^}}v_sad_u32_pat1:
; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @v_sad_u32_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
+define amdgpu_kernel void @v_sad_u32_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
%icmp0 = icmp ugt i32 %a, %b
%t0 = select i1 %icmp0, i32 %a, i32 %b
@@ -18,7 +18,7 @@ define void @v_sad_u32_pat1(i32 addrspac
; GCN-LABEL: {{^}}v_sad_u32_constant_pat1:
; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, 20
-define void @v_sad_u32_constant_pat1(i32 addrspace(1)* %out, i32 %a) {
+define amdgpu_kernel void @v_sad_u32_constant_pat1(i32 addrspace(1)* %out, i32 %a) {
%icmp0 = icmp ugt i32 %a, 90
%t0 = select i1 %icmp0, i32 %a, i32 90
@@ -34,7 +34,7 @@ define void @v_sad_u32_constant_pat1(i32
; GCN-LABEL: {{^}}v_sad_u32_pat2:
; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @v_sad_u32_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
+define amdgpu_kernel void @v_sad_u32_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
%icmp0 = icmp ugt i32 %a, %b
%sub0 = sub i32 %a, %b
%sub1 = sub i32 %b, %a
@@ -51,7 +51,7 @@ define void @v_sad_u32_pat2(i32 addrspac
; GCN: s_min_u32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
; GCN: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
-define void @v_sad_u32_multi_use_sub_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
+define amdgpu_kernel void @v_sad_u32_multi_use_sub_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
%icmp0 = icmp ugt i32 %a, %b
%t0 = select i1 %icmp0, i32 %a, i32 %b
@@ -68,7 +68,7 @@ define void @v_sad_u32_multi_use_sub_pat
; GCN-LABEL: {{^}}v_sad_u32_multi_use_add_pat1:
; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @v_sad_u32_multi_use_add_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
+define amdgpu_kernel void @v_sad_u32_multi_use_add_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
%icmp0 = icmp ugt i32 %a, %b
%t0 = select i1 %icmp0, i32 %a, i32 %b
@@ -84,7 +84,7 @@ define void @v_sad_u32_multi_use_add_pat
; GCN-LABEL: {{^}}v_sad_u32_multi_use_max_pat1:
; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @v_sad_u32_multi_use_max_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
+define amdgpu_kernel void @v_sad_u32_multi_use_max_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
%icmp0 = icmp ugt i32 %a, %b
%t0 = select i1 %icmp0, i32 %a, i32 %b
store volatile i32 %t0, i32 *undef
@@ -101,7 +101,7 @@ define void @v_sad_u32_multi_use_max_pat
; GCN-LABEL: {{^}}v_sad_u32_multi_use_min_pat1:
; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @v_sad_u32_multi_use_min_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
+define amdgpu_kernel void @v_sad_u32_multi_use_min_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
%icmp0 = icmp ugt i32 %a, %b
%t0 = select i1 %icmp0, i32 %a, i32 %b
@@ -119,7 +119,7 @@ define void @v_sad_u32_multi_use_min_pat
; GCN-LABEL: {{^}}v_sad_u32_multi_use_sub_pat2:
; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @v_sad_u32_multi_use_sub_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
+define amdgpu_kernel void @v_sad_u32_multi_use_sub_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
%icmp0 = icmp ugt i32 %a, %b
%sub0 = sub i32 %a, %b
store volatile i32 %sub0, i32 *undef
@@ -136,7 +136,7 @@ define void @v_sad_u32_multi_use_sub_pat
; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
; GCN: v_cmp_gt_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
-define void @v_sad_u32_multi_use_select_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
+define amdgpu_kernel void @v_sad_u32_multi_use_select_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
%icmp0 = icmp ugt i32 %a, %b
%sub0 = sub i32 %a, %b
%sub1 = sub i32 %b, %a
@@ -154,7 +154,7 @@ define void @v_sad_u32_multi_use_select_
; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @v_sad_u32_vector_pat1(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define amdgpu_kernel void @v_sad_u32_vector_pat1(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
%icmp0 = icmp ugt <4 x i32> %a, %b
%t0 = select <4 x i1> %icmp0, <4 x i32> %a, <4 x i32> %b
@@ -173,7 +173,7 @@ define void @v_sad_u32_vector_pat1(<4 x
; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @v_sad_u32_vector_pat2(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define amdgpu_kernel void @v_sad_u32_vector_pat2(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
%icmp0 = icmp ugt <4 x i32> %a, %b
%sub0 = sub <4 x i32> %a, %b
%sub1 = sub <4 x i32> %b, %a
@@ -187,7 +187,7 @@ define void @v_sad_u32_vector_pat2(<4 x
; GCN-LABEL: {{^}}v_sad_u32_i16_pat1:
; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @v_sad_u32_i16_pat1(i16 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) {
+define amdgpu_kernel void @v_sad_u32_i16_pat1(i16 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) {
%icmp0 = icmp ugt i16 %a, %b
%t0 = select i1 %icmp0, i16 %a, i16 %b
@@ -204,7 +204,7 @@ define void @v_sad_u32_i16_pat1(i16 addr
; GCN-LABEL: {{^}}v_sad_u32_i16_pat2:
; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @v_sad_u32_i16_pat2(i16 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b, i16 zeroext %c) {
+define amdgpu_kernel void @v_sad_u32_i16_pat2(i16 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b, i16 zeroext %c) {
%icmp0 = icmp ugt i16 %a, %b
%sub0 = sub i16 %a, %b
%sub1 = sub i16 %b, %a
@@ -218,7 +218,7 @@ define void @v_sad_u32_i16_pat2(i16 addr
; GCN-LABEL: {{^}}v_sad_u32_i8_pat1:
; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @v_sad_u32_i8_pat1(i8 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) {
+define amdgpu_kernel void @v_sad_u32_i8_pat1(i8 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) {
%icmp0 = icmp ugt i8 %a, %b
%t0 = select i1 %icmp0, i8 %a, i8 %b
@@ -234,7 +234,7 @@ define void @v_sad_u32_i8_pat1(i8 addrsp
; GCN-LABEL: {{^}}v_sad_u32_i8_pat2:
; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @v_sad_u32_i8_pat2(i8 addrspace(1)* %out, i8 zeroext %a, i8 zeroext %b, i8 zeroext %c) {
+define amdgpu_kernel void @v_sad_u32_i8_pat2(i8 addrspace(1)* %out, i8 zeroext %a, i8 zeroext %b, i8 zeroext %c) {
%icmp0 = icmp ugt i8 %a, %b
%sub0 = sub i8 %a, %b
%sub1 = sub i8 %b, %a
@@ -251,7 +251,7 @@ define void @v_sad_u32_i8_pat2(i8 addrsp
; GCN: s_max_u32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
-define void @v_sad_u32_mismatched_operands_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
+define amdgpu_kernel void @v_sad_u32_mismatched_operands_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
%icmp0 = icmp ugt i32 %a, %b
%t0 = select i1 %icmp0, i32 %a, i32 %b
@@ -269,7 +269,7 @@ define void @v_sad_u32_mismatched_operan
; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
-define void @v_sad_u32_mismatched_operands_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
+define amdgpu_kernel void @v_sad_u32_mismatched_operands_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
%icmp0 = icmp ugt i32 %a, %b
%sub0 = sub i32 %a, %d
%sub1 = sub i32 %b, %a
Modified: llvm/trunk/test/CodeGen/AMDGPU/saddo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/saddo.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/saddo.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/saddo.ll Tue Mar 21 16:39:51 2017
@@ -6,7 +6,7 @@ declare { i32, i1 } @llvm.sadd.with.over
declare { i64, i1 } @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
; FUNC-LABEL: {{^}}saddo_i64_zext:
-define void @saddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
+define amdgpu_kernel void @saddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %sadd, 0
%carry = extractvalue { i64, i1 } %sadd, 1
@@ -17,7 +17,7 @@ define void @saddo_i64_zext(i64 addrspac
}
; FUNC-LABEL: {{^}}s_saddo_i32:
-define void @s_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @s_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
%sadd = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind
%val = extractvalue { i32, i1 } %sadd, 0
%carry = extractvalue { i32, i1 } %sadd, 1
@@ -27,7 +27,7 @@ define void @s_saddo_i32(i32 addrspace(1
}
; FUNC-LABEL: {{^}}v_saddo_i32:
-define void @v_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+define amdgpu_kernel void @v_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%a = load i32, i32 addrspace(1)* %aptr, align 4
%b = load i32, i32 addrspace(1)* %bptr, align 4
%sadd = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind
@@ -39,7 +39,7 @@ define void @v_saddo_i32(i32 addrspace(1
}
; FUNC-LABEL: {{^}}s_saddo_i64:
-define void @s_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
+define amdgpu_kernel void @s_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
%sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %sadd, 0
%carry = extractvalue { i64, i1 } %sadd, 1
@@ -51,7 +51,7 @@ define void @s_saddo_i64(i64 addrspace(1
; FUNC-LABEL: {{^}}v_saddo_i64:
; SI: v_add_i32
; SI: v_addc_u32
-define void @v_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
+define amdgpu_kernel void @v_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%a = load i64, i64 addrspace(1)* %aptr, align 4
%b = load i64, i64 addrspace(1)* %bptr, align 4
%sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind
Modified: llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll Tue Mar 21 16:39:51 2017
@@ -24,7 +24,7 @@ declare i32 @llvm.amdgcn.workitem.id.y()
; GCN-HSA: flat_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}
; GCN-HSA: flat_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}
-define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = call i32 @llvm.amdgcn.workitem.id.y()
@@ -65,7 +65,7 @@ done:
; GCN: v_mov_b32_e32 [[V_OUT:v[0-9]+]], [[OUT]]
; GCN-NOHSA: buffer_store_dword [[V_OUT]]
; GCN-HSA: flat_store_dword {{.*}}, [[V_OUT]]
-define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 {
+define amdgpu_kernel void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 {
entry:
%tmp = icmp ne i32 %a, 0
br i1 %tmp, label %if, label %else
@@ -93,7 +93,7 @@ endif:
; GCN-NOHSA-NOT: v_add
; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16{{$}}
; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
-define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) #1 {
+define amdgpu_kernel void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = add i32 %tmp, 4
@@ -113,7 +113,7 @@ entry:
; GCN-NOHSA: buffer_store_dword
; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
; GCN-HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}
-define void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %c) #1 {
+define amdgpu_kernel void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %c) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp2 = getelementptr i32, i32 addrspace(2)* %in, i32 %tmp
@@ -133,7 +133,7 @@ entry:
; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; GCN-NOHSA: buffer_store_dwordx2
; GCN-HSA: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
-define void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(2)* %in, i64 %c) #1 {
+define amdgpu_kernel void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(2)* %in, i64 %c) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp2 = getelementptr i64, i64 addrspace(2)* %in, i32 %tmp
@@ -155,7 +155,7 @@ entry:
; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; GCN-NOHSA: buffer_store_dwordx4
; GCN-HSA: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
-define void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in, <4 x i32> %c) #1 {
+define amdgpu_kernel void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in, <4 x i32> %c) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp2 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %in, i32 %tmp
@@ -189,7 +189,7 @@ entry:
; GCN-NOHSA: buffer_store_dwordx4
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
-define void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in, <8 x i32> %c) #1 {
+define amdgpu_kernel void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in, <8 x i32> %c) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp2 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %in, i32 %tmp
@@ -230,7 +230,7 @@ entry:
; GCN-HSA: flat_load_dwordx4
; GCN: s_endpgm
-define void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in, <16 x i32> %c) #1 {
+define amdgpu_kernel void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in, <16 x i32> %c) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp2 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %in, i32 %tmp
@@ -247,7 +247,7 @@ entry:
; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[MOVED]]
; GCN-NOHSA: buffer_store_dword [[ADD]]
; GCN-HSA: flat_store_dword {{.*}}, [[ADD]]
-define void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in, i32 %a) #1 {
+define amdgpu_kernel void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in, i32 %a) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = add i32 %tmp, 4
@@ -261,7 +261,7 @@ entry:
; GCN-LABEL: {{^}}smrd_valu2_max_smrd_offset:
; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1020{{$}}
; GCN-HSA flat_load_dword v{{[0-9]}}, v{{[0-9]+:[0-9]+}}
-define void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 {
+define amdgpu_kernel void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = add i32 %tmp, 4
@@ -275,7 +275,7 @@ entry:
; GCN-NOHSA-NOT: v_add
; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1024{{$}}
; GCN-HSA: flat_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}]
-define void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 {
+define amdgpu_kernel void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = add i32 %tmp, 4
@@ -290,7 +290,7 @@ entry:
; GCN-NOHSA: buffer_load_dwordx4
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
-define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
+define amdgpu_kernel void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
entry:
%tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
@@ -313,7 +313,7 @@ entry:
; GCN-NOHSA: buffer_store_dword
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
-define void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
+define amdgpu_kernel void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
entry:
%tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
@@ -350,7 +350,7 @@ entry:
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
-define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
+define amdgpu_kernel void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
entry:
%tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
@@ -385,7 +385,7 @@ entry:
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
; GCN-HSA: flat_load_dwordx4
-define void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
+define amdgpu_kernel void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
entry:
%tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
@@ -441,7 +441,7 @@ entry:
; GCN-HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[ONE]]
; GCN: {{^}}[[EXIT]]:
; GCN: s_endpgm
-define void @sopc_vopc_legalize_bug(i32 %cond, i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @sopc_vopc_legalize_bug(i32 %cond, i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
bb3: ; preds = %bb2
%tmp0 = bitcast i32 %cond to float
%tmp1 = fadd float %tmp0, 2.500000e-01
@@ -459,7 +459,7 @@ bb7:
; GCN-LABEL: {{^}}phi_visit_order:
; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 1, v{{[0-9]+}}
-define void @phi_visit_order() {
+define amdgpu_kernel void @phi_visit_order() {
bb:
br label %bb1
@@ -484,7 +484,7 @@ bb4:
; GCN: [[LOOP_LABEL:[0-9a-zA-Z_]+]]:
; GCN: s_xor_b32 [[B]], [[B]], [[A]]
; GCN: s_cbranch_scc{{[01]}} [[LOOP_LABEL]]
-define void @phi_imm_in_sgprs(i32 addrspace(3)* %out, i32 %cond) {
+define amdgpu_kernel void @phi_imm_in_sgprs(i32 addrspace(3)* %out, i32 %cond) {
entry:
br label %loop
Modified: llvm/trunk/test/CodeGen/AMDGPU/sampler-resource-id.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sampler-resource-id.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sampler-resource-id.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sampler-resource-id.ll Tue Mar 21 16:39:51 2017
@@ -5,7 +5,7 @@
; EG: MOV [[VAL]], literal.x
; EG-NEXT: LSHR
; EG-NEXT: 0(
-define void @test_0(i32 %in0, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @test_0(i32 %in0, i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in0) #0
store i32 %0, i32 addrspace(1)* %out
@@ -17,7 +17,7 @@ entry:
; EG: MOV [[VAL]], literal.x
; EG-NEXT: LSHR
; EG-NEXT: 1(
-define void @test_1(i32 %in0, i32 %in1, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @test_1(i32 %in0, i32 %in1, i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in1) #0
store i32 %0, i32 addrspace(1)* %out
@@ -29,7 +29,7 @@ entry:
; EG: MOV [[VAL]], literal.x
; EG-NEXT: LSHR
; EG-NEXT: 2(
-define void @test_2(i32 %in0, i32 %in1, i32 %in2, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @test_2(i32 %in0, i32 %in1, i32 %in2, i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in2) #0
store i32 %0, i32 addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir Tue Mar 21 16:39:51 2017
@@ -1,23 +1,23 @@
# RUN: llc -march=amdgcn -run-pass si-insert-waits %s -o - | FileCheck %s
--- |
- define void @basic_insert_dcache_wb() {
+ define amdgpu_kernel void @basic_insert_dcache_wb() {
ret void
}
- define void @explicit_flush_after() {
+ define amdgpu_kernel void @explicit_flush_after() {
ret void
}
- define void @explicit_flush_before() {
+ define amdgpu_kernel void @explicit_flush_before() {
ret void
}
- define void @no_scalar_store() {
+ define amdgpu_kernel void @no_scalar_store() {
ret void
}
- define void @multi_block_store() {
+ define amdgpu_kernel void @multi_block_store() {
bb0:
br i1 undef, label %bb1, label %bb2
@@ -28,7 +28,7 @@
ret void
}
- define void @one_block_store() {
+ define amdgpu_kernel void @one_block_store() {
bb0:
br i1 undef, label %bb1, label %bb2
Modified: llvm/trunk/test/CodeGen/AMDGPU/scalar_to_vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/scalar_to_vector.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/scalar_to_vector.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/scalar_to_vector.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@
; GCN: v_or_b32_e32 v[[OR:[0-9]+]], [[SHL]], [[SHR]]
; GCN: v_mov_b32_e32 v[[COPY:[0-9]+]], v[[OR]]
; GCN: buffer_store_dwordx2 v{{\[}}[[OR]]:[[COPY]]{{\]}}
-define void @scalar_to_vector_v2i32(<4 x i16> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @scalar_to_vector_v2i32(<4 x i16> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%tmp1 = load i32, i32 addrspace(1)* %in, align 4
%bc = bitcast i32 %tmp1 to <2 x i16>
%tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -21,7 +21,7 @@ define void @scalar_to_vector_v2i32(<4 x
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: v_lshrrev_b32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
; GCN: buffer_store_dwordx2
-define void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
%tmp1 = load float, float addrspace(1)* %in, align 4
%bc = bitcast float %tmp1 to <2 x i16>
%tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -33,7 +33,7 @@ define void @scalar_to_vector_v2f32(<4 x
; to produce one, but for some reason never made it to selection.
-; define void @scalar_to_vector_test2(<8 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+; define amdgpu_kernel void @scalar_to_vector_test2(<8 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
; %tmp1 = load i32, i32 addrspace(1)* %in, align 4
; %bc = bitcast i32 %tmp1 to <4 x i8>
@@ -42,7 +42,7 @@ define void @scalar_to_vector_v2f32(<4 x
; ret void
; }
-; define void @scalar_to_vector_test3(<4 x i32> addrspace(1)* %out) nounwind {
+; define amdgpu_kernel void @scalar_to_vector_test3(<4 x i32> addrspace(1)* %out) nounwind {
; %newvec0 = insertelement <2 x i64> undef, i64 12345, i32 0
; %newvec1 = insertelement <2 x i64> %newvec0, i64 undef, i32 1
; %bc = bitcast <2 x i64> %newvec1 to <4 x i32>
@@ -51,7 +51,7 @@ define void @scalar_to_vector_v2f32(<4 x
; ret void
; }
-; define void @scalar_to_vector_test4(<8 x i16> addrspace(1)* %out) nounwind {
+; define amdgpu_kernel void @scalar_to_vector_test4(<8 x i16> addrspace(1)* %out) nounwind {
; %newvec0 = insertelement <4 x i32> undef, i32 12345, i32 0
; %bc = bitcast <4 x i32> %newvec0 to <8 x i16>
; %add = add <8 x i16> %bc, <i16 1, i16 2, i16 3, i16 4, i16 1, i16 2, i16 3, i16 4>
@@ -59,7 +59,7 @@ define void @scalar_to_vector_v2f32(<4 x
; ret void
; }
-; define void @scalar_to_vector_test5(<4 x i16> addrspace(1)* %out) nounwind {
+; define amdgpu_kernel void @scalar_to_vector_test5(<4 x i16> addrspace(1)* %out) nounwind {
; %newvec0 = insertelement <2 x i32> undef, i32 12345, i32 0
; %bc = bitcast <2 x i32> %newvec0 to <4 x i16>
; %add = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
@@ -67,7 +67,7 @@ define void @scalar_to_vector_v2f32(<4 x
; ret void
; }
-define void @scalar_to_vector_test6(<2 x half> addrspace(1)* %out, i8 zeroext %val) nounwind {
+define amdgpu_kernel void @scalar_to_vector_test6(<2 x half> addrspace(1)* %out, i8 zeroext %val) nounwind {
%newvec0 = insertelement <4 x i8> undef, i8 %val, i32 0
%bc = bitcast <4 x i8> %newvec0 to <2 x half>
store <2 x half> %bc, <2 x half> addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/schedule-fs-loop-nested.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/schedule-fs-loop-nested.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/schedule-fs-loop-nested.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/schedule-fs-loop-nested.ll Tue Mar 21 16:39:51 2017
@@ -1,7 +1,7 @@
; RUN: llc -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs < %s
; REQUIRES: asserts
-define void @main() #0 {
+define amdgpu_kernel void @main() #0 {
main_body:
%tmp = load <4 x float>, <4 x float> addrspace(9)* null
%tmp5 = extractelement <4 x float> %tmp, i32 3
Modified: llvm/trunk/test/CodeGen/AMDGPU/schedule-global-loads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/schedule-global-loads.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/schedule-global-loads.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/schedule-global-loads.ll Tue Mar 21 16:39:51 2017
@@ -10,7 +10,7 @@
; SI-DAG: buffer_load_dword [[REG1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
; SI: buffer_store_dword [[REG0]]
; SI: buffer_store_dword [[REG1]]
-define void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr) #0 {
%load0 = load i32, i32 addrspace(1)* %ptr, align 4
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 2
%load1 = load i32, i32 addrspace(1)* %gep, align 4
@@ -24,7 +24,7 @@ define void @cluster_global_arg_loads(i3
; FUNC-LABEL: {{^}}same_base_ptr_crash:
; SI: buffer_load_dword
; SI: buffer_load_dword
-define void @same_base_ptr_crash(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
+define amdgpu_kernel void @same_base_ptr_crash(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
entry:
%out1 = getelementptr i32, i32 addrspace(1)* %out, i32 %offset
%tmp0 = load i32, i32 addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/schedule-if-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/schedule-if-2.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/schedule-if-2.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/schedule-if-2.ll Tue Mar 21 16:39:51 2017
@@ -1,7 +1,7 @@
;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
;REQUIRES: asserts
-define void @main() {
+define amdgpu_kernel void @main() {
main_body:
%0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%1 = extractelement <4 x float> %0, i32 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/schedule-if.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/schedule-if.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/schedule-if.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/schedule-if.ll Tue Mar 21 16:39:51 2017
@@ -1,7 +1,7 @@
;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
;REQUIRES: asserts
-define void @main() {
+define amdgpu_kernel void @main() {
main_body:
%0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%1 = extractelement <4 x float> %0, i32 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll Tue Mar 21 16:39:51 2017
@@ -12,7 +12,7 @@
; VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x24
; VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; VI: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x38
-define void @cluster_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) nounwind {
+define amdgpu_kernel void @cluster_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) nounwind {
store i32 %x, i32 addrspace(1)* %out0, align 4
store i32 %y, i32 addrspace(1)* %out1, align 4
ret void
@@ -26,7 +26,7 @@ define void @cluster_arg_loads(i32 addrs
; GCN: s_load_dwordx2
; GCN: s_load_dwordx2
; GCN: s_endpgm
-define void @same_base_ptr_crash(i64 addrspace(1)* %out,
+define amdgpu_kernel void @same_base_ptr_crash(i64 addrspace(1)* %out,
i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg5, i64 %arg6, i64 %arg7,
i64 %arg8, i64 %arg9, i64 %arg10, i64 %arg11, i64 %arg12, i64 %arg13, i64 %arg14, i64 %arg15,
i64 %arg16, i64 %arg17, i64 %arg18, i64 %arg19, i64 %arg20, i64 %arg21, i64 %arg22, i64 %arg23,
Modified: llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure-limit.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure-limit.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure-limit.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure-limit.ll Tue Mar 21 16:39:51 2017
@@ -5,7 +5,7 @@
; We expect a two digit VGPR usage here, not a three digit.
; CHECK: NumVgprs: {{[0-9][0-9]$}}
-define void @load_fma_store(float addrspace(3)* nocapture readonly %arg, float addrspace(1)* nocapture %arg1) {
+define amdgpu_kernel void @load_fma_store(float addrspace(3)* nocapture readonly %arg, float addrspace(1)* nocapture %arg1) {
bb:
%tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 1
%tmp2 = load float, float addrspace(3)* %tmp, align 4
Modified: llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll Tue Mar 21 16:39:51 2017
@@ -10,7 +10,7 @@
; VI: NumSgprs: {{[1-5][0-9]$}}
; VI: NumVgprs: {{[1-3][0-9]$}}
-define void @load_fma_store(float addrspace(3)* nocapture readonly %in_arg, float addrspace(1)* nocapture %out_arg) {
+define amdgpu_kernel void @load_fma_store(float addrspace(3)* nocapture readonly %in_arg, float addrspace(1)* nocapture %out_arg) {
bb:
%adr.a.0 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 20004
%adr.b.0 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 20252
Modified: llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll Tue Mar 21 16:39:51 2017
@@ -13,7 +13,7 @@
; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x8004
; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
-define void @legal_offset_fi(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) {
+define amdgpu_kernel void @legal_offset_fi(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) {
entry:
%scratch0 = alloca [8192 x i32]
%scratch1 = alloca [8192 x i32]
@@ -53,7 +53,7 @@ done:
; GCN-DAG: v_add_i32_e32 [[OFFSET:v[0-9]+]], vcc, [[K8000]]
; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
-define void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) {
+define amdgpu_kernel void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) {
entry:
%scratch0 = alloca [8192 x i32]
%scratch1 = alloca [8192 x i32]
@@ -88,7 +88,7 @@ done:
; GCN-LABEL: {{^}}neg_vaddr_offset:
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:16{{$}}
-define void @neg_vaddr_offset(i32 %offset) {
+define amdgpu_kernel void @neg_vaddr_offset(i32 %offset) {
entry:
%array = alloca [8192 x i32]
%ptr_offset = add i32 %offset, 4
@@ -99,7 +99,7 @@ entry:
; GCN-LABEL: {{^}}pos_vaddr_offset:
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:20
-define void @pos_vaddr_offset(i32 addrspace(1)* %out, i32 %offset) {
+define amdgpu_kernel void @pos_vaddr_offset(i32 addrspace(1)* %out, i32 %offset) {
entry:
%array = alloca [8192 x i32]
%ptr = getelementptr [8192 x i32], [8192 x i32]* %array, i32 0, i32 4
Modified: llvm/trunk/test/CodeGen/AMDGPU/sdiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sdiv.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sdiv.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sdiv.ll Tue Mar 21 16:39:51 2017
@@ -13,7 +13,7 @@
; FUNC-LABEL: {{^}}sdiv_i32:
; EG: CF_END
-define void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%num = load i32, i32 addrspace(1) * %in
%den = load i32, i32 addrspace(1) * %den_ptr
@@ -23,7 +23,7 @@ define void @sdiv_i32(i32 addrspace(1)*
}
; FUNC-LABEL: {{^}}sdiv_i32_4:
-define void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%num = load i32, i32 addrspace(1) * %in
%result = sdiv i32 %num, 4
store i32 %result, i32 addrspace(1)* %out
@@ -43,14 +43,14 @@ define void @sdiv_i32_4(i32 addrspace(1)
; SI: v_add_i32
; SI: buffer_store_dword
; SI: s_endpgm
-define void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%num = load i32, i32 addrspace(1) * %in
%result = sdiv i32 %num, 3435
store i32 %result, i32 addrspace(1)* %out
ret void
}
-define void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%den_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
%num = load <2 x i32>, <2 x i32> addrspace(1) * %in
%den = load <2 x i32>, <2 x i32> addrspace(1) * %den_ptr
@@ -59,14 +59,14 @@ define void @sdiv_v2i32(<2 x i32> addrsp
ret void
}
-define void @sdiv_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @sdiv_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%num = load <2 x i32>, <2 x i32> addrspace(1) * %in
%result = sdiv <2 x i32> %num, <i32 4, i32 4>
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
}
-define void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%den_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
%num = load <4 x i32>, <4 x i32> addrspace(1) * %in
%den = load <4 x i32>, <4 x i32> addrspace(1) * %den_ptr
@@ -75,7 +75,7 @@ define void @sdiv_v4i32(<4 x i32> addrsp
ret void
}
-define void @sdiv_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @sdiv_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%num = load <4 x i32>, <4 x i32> addrspace(1) * %in
%result = sdiv <4 x i32> %num, <i32 4, i32 4, i32 4, i32 4>
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
@@ -86,7 +86,7 @@ define void @sdiv_v4i32_4(<4 x i32> addr
; SI: v_rcp_f32
; SI: v_bfe_i32 [[BFE:v[0-9]+]], v{{[0-9]+}}, 0, 8
; SI: buffer_store_dword [[BFE]]
-define void @v_sdiv_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
+define amdgpu_kernel void @v_sdiv_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
%den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
%num = load i8, i8 addrspace(1) * %in
%den = load i8, i8 addrspace(1) * %den_ptr
@@ -100,7 +100,7 @@ define void @v_sdiv_i8(i32 addrspace(1)*
; SI: v_rcp_f32
; SI: v_bfe_i32 [[BFE:v[0-9]+]], v{{[0-9]+}}, 0, 23
; SI: buffer_store_dword [[BFE]]
-define void @v_sdiv_i23(i32 addrspace(1)* %out, i23 addrspace(1)* %in) {
+define amdgpu_kernel void @v_sdiv_i23(i32 addrspace(1)* %out, i23 addrspace(1)* %in) {
%den_ptr = getelementptr i23, i23 addrspace(1)* %in, i23 1
%num = load i23, i23 addrspace(1) * %in
%den = load i23, i23 addrspace(1) * %den_ptr
@@ -114,7 +114,7 @@ define void @v_sdiv_i23(i32 addrspace(1)
; SI: v_rcp_f32
; SI: v_bfe_i32 [[BFE:v[0-9]+]], v{{[0-9]+}}, 0, 24
; SI: buffer_store_dword [[BFE]]
-define void @v_sdiv_i24(i32 addrspace(1)* %out, i24 addrspace(1)* %in) {
+define amdgpu_kernel void @v_sdiv_i24(i32 addrspace(1)* %out, i24 addrspace(1)* %in) {
%den_ptr = getelementptr i24, i24 addrspace(1)* %in, i24 1
%num = load i24, i24 addrspace(1) * %in
%den = load i24, i24 addrspace(1) * %den_ptr
@@ -126,7 +126,7 @@ define void @v_sdiv_i24(i32 addrspace(1)
; FUNC-LABEL: {{^}}v_sdiv_i25:
; SI-NOT: v_rcp_f32
-define void @v_sdiv_i25(i32 addrspace(1)* %out, i25 addrspace(1)* %in) {
+define amdgpu_kernel void @v_sdiv_i25(i32 addrspace(1)* %out, i25 addrspace(1)* %in) {
%den_ptr = getelementptr i25, i25 addrspace(1)* %in, i25 1
%num = load i25, i25 addrspace(1) * %in
%den = load i25, i25 addrspace(1) * %den_ptr
@@ -137,19 +137,19 @@ define void @v_sdiv_i25(i32 addrspace(1)
}
; Tests for 64-bit divide bypass.
-; define void @test_get_quotient(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
+; define amdgpu_kernel void @test_get_quotient(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
; %result = sdiv i64 %a, %b
; store i64 %result, i64 addrspace(1)* %out, align 8
; ret void
; }
-; define void @test_get_remainder(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
+; define amdgpu_kernel void @test_get_remainder(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
; %result = srem i64 %a, %b
; store i64 %result, i64 addrspace(1)* %out, align 8
; ret void
; }
-; define void @test_get_quotient_and_remainder(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
+; define amdgpu_kernel void @test_get_quotient_and_remainder(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
; %resultdiv = sdiv i64 %a, %b
; %resultrem = srem i64 %a, %b
; %result = add i64 %resultdiv, %resultrem
@@ -163,7 +163,7 @@ define void @v_sdiv_i25(i32 addrspace(1)
; SI: v_mul_hi_i32
; SI: v_mul_hi_i32
-define void @scalarize_mulhs_4xi32(<4 x i32> addrspace(1)* nocapture readonly %in, <4 x i32> addrspace(1)* nocapture %out) {
+define amdgpu_kernel void @scalarize_mulhs_4xi32(<4 x i32> addrspace(1)* nocapture readonly %in, <4 x i32> addrspace(1)* nocapture %out) {
%1 = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16
%2 = sdiv <4 x i32> %1, <i32 53668, i32 53668, i32 53668, i32 53668>
store <4 x i32> %2, <4 x i32> addrspace(1)* %out, align 16
Modified: llvm/trunk/test/CodeGen/AMDGPU/sdivrem24.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sdivrem24.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sdivrem24.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sdivrem24.ll Tue Mar 21 16:39:51 2017
@@ -12,7 +12,7 @@
; EG-DAG: INT_TO_FLT
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_INT
-define void @sdiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
+define amdgpu_kernel void @sdiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
%den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
%num = load i8, i8 addrspace(1) * %in
%den = load i8, i8 addrspace(1) * %den_ptr
@@ -31,7 +31,7 @@ define void @sdiv24_i8(i8 addrspace(1)*
; EG-DAG: INT_TO_FLT
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_INT
-define void @sdiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
+define amdgpu_kernel void @sdiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
%den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
%num = load i16, i16 addrspace(1) * %in, align 2
%den = load i16, i16 addrspace(1) * %den_ptr, align 2
@@ -50,7 +50,7 @@ define void @sdiv24_i16(i16 addrspace(1)
; EG-DAG: INT_TO_FLT
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_INT
-define void @sdiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @sdiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%num = load i32, i32 addrspace(1) * %in, align 4
%den = load i32, i32 addrspace(1) * %den_ptr, align 4
@@ -69,7 +69,7 @@ define void @sdiv24_i32(i32 addrspace(1)
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
-define void @sdiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @sdiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%num = load i32, i32 addrspace(1) * %in, align 4
%den = load i32, i32 addrspace(1) * %den_ptr, align 4
@@ -88,7 +88,7 @@ define void @sdiv25_i32(i32 addrspace(1)
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
-define void @test_no_sdiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @test_no_sdiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%num = load i32, i32 addrspace(1) * %in, align 4
%den = load i32, i32 addrspace(1) * %den_ptr, align 4
@@ -107,7 +107,7 @@ define void @test_no_sdiv24_i32_1(i32 ad
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
-define void @test_no_sdiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @test_no_sdiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%num = load i32, i32 addrspace(1) * %in, align 4
%den = load i32, i32 addrspace(1) * %den_ptr, align 4
@@ -130,7 +130,7 @@ define void @test_no_sdiv24_i32_2(i32 ad
; EG-DAG: INT_TO_FLT
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_INT
-define void @srem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
+define amdgpu_kernel void @srem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
%den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
%num = load i8, i8 addrspace(1) * %in
%den = load i8, i8 addrspace(1) * %den_ptr
@@ -149,7 +149,7 @@ define void @srem24_i8(i8 addrspace(1)*
; EG-DAG: INT_TO_FLT
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_INT
-define void @srem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
+define amdgpu_kernel void @srem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
%den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
%num = load i16, i16 addrspace(1) * %in, align 2
%den = load i16, i16 addrspace(1) * %den_ptr, align 2
@@ -168,7 +168,7 @@ define void @srem24_i16(i16 addrspace(1)
; EG-DAG: INT_TO_FLT
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_INT
-define void @srem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @srem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%num = load i32, i32 addrspace(1) * %in, align 4
%den = load i32, i32 addrspace(1) * %den_ptr, align 4
@@ -187,7 +187,7 @@ define void @srem24_i32(i32 addrspace(1)
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
-define void @no_srem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @no_srem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%num = load i32, i32 addrspace(1) * %in, align 4
%den = load i32, i32 addrspace(1) * %den_ptr, align 4
@@ -206,7 +206,7 @@ define void @no_srem25_i32(i32 addrspace
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
-define void @no_sdiv25_i24_i25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @no_sdiv25_i24_i25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%num = load i32, i32 addrspace(1) * %in, align 4
%den = load i32, i32 addrspace(1) * %den_ptr, align 4
@@ -225,7 +225,7 @@ define void @no_sdiv25_i24_i25_i32(i32 a
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
-define void @no_sdiv25_i25_i24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @no_sdiv25_i25_i24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%num = load i32, i32 addrspace(1) * %in, align 4
%den = load i32, i32 addrspace(1) * %den_ptr, align 4
@@ -244,7 +244,7 @@ define void @no_sdiv25_i25_i24_i32(i32 a
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
-define void @no_srem25_i24_i25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @no_srem25_i24_i25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%num = load i32, i32 addrspace(1) * %in, align 4
%den = load i32, i32 addrspace(1) * %den_ptr, align 4
@@ -263,7 +263,7 @@ define void @no_srem25_i24_i25_i32(i32 a
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
-define void @no_srem25_i25_i24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @no_srem25_i25_i24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%num = load i32, i32 addrspace(1) * %in, align 4
%den = load i32, i32 addrspace(1) * %den_ptr, align 4
@@ -283,7 +283,7 @@ define void @no_srem25_i25_i24_i32(i32 a
; EG: INT_TO_FLT
; EG: RECIP_IEEE
-define void @srem25_i24_i11_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @srem25_i24_i11_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%num = load i32, i32 addrspace(1) * %in, align 4
%den = load i32, i32 addrspace(1) * %den_ptr, align 4
@@ -303,7 +303,7 @@ define void @srem25_i24_i11_i32(i32 addr
; EG: INT_TO_FLT
; EG: RECIP_IEEE
-define void @srem25_i11_i24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @srem25_i11_i24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%num = load i32, i32 addrspace(1) * %in, align 4
%den = load i32, i32 addrspace(1) * %den_ptr, align 4
@@ -323,7 +323,7 @@ define void @srem25_i11_i24_i32(i32 addr
; EG: INT_TO_FLT
; EG: RECIP_IEEE
-define void @srem25_i17_i12_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @srem25_i17_i12_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%num = load i32, i32 addrspace(1) * %in, align 4
%den = load i32, i32 addrspace(1) * %den_ptr, align 4
Modified: llvm/trunk/test/CodeGen/AMDGPU/sdivrem64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sdivrem64.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sdivrem64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sdivrem64.ll Tue Mar 21 16:39:51 2017
@@ -70,7 +70,7 @@
; SI-NOT: v_lshr_b64
; VI-NOT: v_lshrrev_b64
; GCN: s_endpgm
-define void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%result = sdiv i64 %x, %y
store i64 %result, i64 addrspace(1)* %out
ret void
@@ -144,7 +144,7 @@ define void @s_test_sdiv(i64 addrspace(1
;SI-NOT: v_lshr_b64
;VI-NOT: v_lshrrev_b64
;GCN: s_endpgm
-define void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%result = urem i64 %x, %y
store i64 %result, i64 addrspace(1)* %out
ret void
@@ -159,7 +159,7 @@ define void @s_test_srem(i64 addrspace(1
;SI-NOT: v_lshr_b64
;VI-NOT: v_lshrrev_b64
;GCN: s_endpgm
-define void @test_sdiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_sdiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%1 = ashr i64 %x, 33
%2 = ashr i64 %y, 33
%result = sdiv i64 %1, %2
@@ -176,7 +176,7 @@ define void @test_sdiv3264(i64 addrspace
;SI-NOT: v_lshr_b64
;VI-NOT: v_lshrrev_b64
;GCN: s_endpgm
-define void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%1 = ashr i64 %x, 33
%2 = ashr i64 %y, 33
%result = srem i64 %1, %2
@@ -196,7 +196,7 @@ define void @test_srem3264(i64 addrspace
;SI-NOT: v_lshr_b64
;VI-NOT: v_lshrrev_b64
;GCN: s_endpgm
-define void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%1 = ashr i64 %x, 40
%2 = ashr i64 %y, 40
%result = sdiv i64 %1, %2
@@ -216,7 +216,7 @@ define void @test_sdiv2464(i64 addrspace
;SI-NOT: v_lshr_b64
;VI-NOT: v_lshrrev_b64
;GCN: s_endpgm
-define void @test_srem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_srem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%1 = ashr i64 %x, 40
%2 = ashr i64 %y, 40
%result = srem i64 %1, %2
Modified: llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll Tue Mar 21 16:39:51 2017
@@ -8,7 +8,7 @@
; SDWA: v_add_i32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-define void @add_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @add_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%a = load i32, i32 addrspace(1)* %in, align 4
%shr = lshr i32 %a, 16
%add = add i32 %a, %shr
@@ -23,7 +23,7 @@ define void @add_shr_i32(i32 addrspace(1
; SDWA: v_subrev_i32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-define void @sub_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @sub_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%a = load i32, i32 addrspace(1)* %in, align 4
%shr = lshr i32 %a, 16
%sub = sub i32 %shr, %a
@@ -39,7 +39,7 @@ define void @sub_shr_i32(i32 addrspace(1
; SDWA: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-define void @mul_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in1, i32 addrspace(1)* %in2) {
+define amdgpu_kernel void @mul_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in1, i32 addrspace(1)* %in2) {
%a = load i32, i32 addrspace(1)* %in1, align 4
%b = load i32, i32 addrspace(1)* %in2, align 4
%shra = lshr i32 %a, 16
@@ -55,7 +55,7 @@ define void @mul_shr_i32(i32 addrspace(1
; SDWA: v_mul_lo_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; SDWA-NOT: v_mul_u32_u24_sdwa
-define void @mul_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %ina, i16 addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %ina, i16 addrspace(1)* %inb) {
entry:
%a = load i16, i16 addrspace(1)* %ina, align 4
%b = load i16, i16 addrspace(1)* %inb, align 4
@@ -75,7 +75,7 @@ entry:
; SDWA: v_mul_u32_u24_sdwa v[[DST_MUL:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; SDWA: v_or_b32_e32 v{{[0-9]+}}, v[[DST_MUL]], v{{[0-9]+}}
-define void @mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) {
entry:
%a = load <2 x i16>, <2 x i16> addrspace(1)* %ina, align 4
%b = load <2 x i16>, <2 x i16> addrspace(1)* %inb, align 4
@@ -97,7 +97,7 @@ entry:
; SDWA: v_or_b32_e32 v{{[0-9]+}}, v[[DST_MUL1]], v{{[0-9]+}}
; SDWA: v_or_b32_e32 v{{[0-9]+}}, v[[DST_MUL0]], v{{[0-9]+}}
-define void @mul_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %ina, <4 x i16> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %ina, <4 x i16> addrspace(1)* %inb) {
entry:
%a = load <4 x i16>, <4 x i16> addrspace(1)* %ina, align 4
%b = load <4 x i16>, <4 x i16> addrspace(1)* %inb, align 4
@@ -123,7 +123,7 @@ entry:
; SDWA: v_or_b32_e32 v{{[0-9]+}}, v[[DST_MUL3]], v{{[0-9]+}}
; SDWA: v_or_b32_e32 v{{[0-9]+}}, v[[DST_MUL2]], v{{[0-9]+}}
-define void @mul_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %ina, <8 x i16> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %ina, <8 x i16> addrspace(1)* %inb) {
entry:
%a = load <8 x i16>, <8 x i16> addrspace(1)* %ina, align 4
%b = load <8 x i16>, <8 x i16> addrspace(1)* %inb, align 4
@@ -138,7 +138,7 @@ entry:
; SDWA: v_mul_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; SDWA-NOT: v_mul_f16_sdwa
-define void @mul_half(half addrspace(1)* %out, half addrspace(1)* %ina, half addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_half(half addrspace(1)* %out, half addrspace(1)* %ina, half addrspace(1)* %inb) {
entry:
%a = load half, half addrspace(1)* %ina, align 4
%b = load half, half addrspace(1)* %inb, align 4
@@ -157,7 +157,7 @@ entry:
; SDWA: v_mul_f16_sdwa v[[DST_MUL:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-define void @mul_v2half(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %ina, <2 x half> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v2half(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %ina, <2 x half> addrspace(1)* %inb) {
entry:
%a = load <2 x half>, <2 x half> addrspace(1)* %ina, align 4
%b = load <2 x half>, <2 x half> addrspace(1)* %inb, align 4
@@ -178,7 +178,7 @@ entry:
; SDWA: v_mul_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; SDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @mul_v4half(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %ina, <4 x half> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v4half(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %ina, <4 x half> addrspace(1)* %inb) {
entry:
%a = load <4 x half>, <4 x half> addrspace(1)* %ina, align 4
%b = load <4 x half>, <4 x half> addrspace(1)* %inb, align 4
@@ -204,7 +204,7 @@ entry:
; SDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; SDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @mul_v8half(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %ina, <8 x half> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v8half(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %ina, <8 x half> addrspace(1)* %inb) {
entry:
%a = load <8 x half>, <8 x half> addrspace(1)* %ina, align 4
%b = load <8 x half>, <8 x half> addrspace(1)* %inb, align 4
@@ -219,7 +219,7 @@ entry:
; SDWA: v_mul_lo_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; SDWA-NOT: v_mul_u32_u24_sdwa
-define void @mul_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %ina, i8 addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %ina, i8 addrspace(1)* %inb) {
entry:
%a = load i8, i8 addrspace(1)* %ina, align 4
%b = load i8, i8 addrspace(1)* %inb, align 4
@@ -238,7 +238,7 @@ entry:
; SDWA: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
-define void @mul_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %ina, <2 x i8> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %ina, <2 x i8> addrspace(1)* %inb) {
entry:
%a = load <2 x i8>, <2 x i8> addrspace(1)* %ina, align 4
%b = load <2 x i8>, <2 x i8> addrspace(1)* %inb, align 4
@@ -259,7 +259,7 @@ entry:
; SDWA: v_mul_u32_u24_sdwa
; SDWA: v_mul_u32_u24_sdwa
-define void @mul_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %ina, <4 x i8> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %ina, <4 x i8> addrspace(1)* %inb) {
entry:
%a = load <4 x i8>, <4 x i8> addrspace(1)* %ina, align 4
%b = load <4 x i8>, <4 x i8> addrspace(1)* %inb, align 4
@@ -283,7 +283,7 @@ entry:
; SDWA: v_mul_u32_u24_sdwa
; SDWA: v_mul_u32_u24_sdwa
-define void @mul_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %ina, <8 x i8> addrspace(1)* %inb) {
+define amdgpu_kernel void @mul_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %ina, <8 x i8> addrspace(1)* %inb) {
entry:
%a = load <8 x i8>, <8 x i8> addrspace(1)* %ina, align 4
%b = load <8 x i8>, <8 x i8> addrspace(1)* %inb, align 4
@@ -304,7 +304,7 @@ entry:
; SDWA: v_mac_f16_sdwa v[[DST_MAC:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; SDWA: v_lshlrev_b32_e32 v[[DST_SHL:[0-9]+]], 16, v[[DST_MAC]]
-define void @mac_v2half(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %ina, <2 x half> addrspace(1)* %inb) {
+define amdgpu_kernel void @mac_v2half(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %ina, <2 x half> addrspace(1)* %inb) {
entry:
%a = load <2 x half>, <2 x half> addrspace(1)* %ina, align 4
%b = load <2 x half>, <2 x half> addrspace(1)* %inb, align 4
@@ -318,7 +318,7 @@ entry:
; NOSDWA-NOT: v_mul_u32_u24_sdwa
; SDWA-NOT: v_mul_u32_u24_sdwa
-define void @immediate_mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @immediate_mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
entry:
%a = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4
%mul = mul <2 x i16> %a, <i16 123, i16 321>
@@ -337,7 +337,7 @@ entry:
; SDWA: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-define void @mulmul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) {
+define amdgpu_kernel void @mulmul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) {
entry:
%a = load <2 x i16>, <2 x i16> addrspace(1)* %ina, align 4
%b = load <2 x i16>, <2 x i16> addrspace(1)* %inb, align 4
@@ -353,7 +353,7 @@ entry:
; SDWA-NOT: v_mul_u32_u24_sdwa
; SDWA-NOT: v_add_i32_sdwa
-define void @mul_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb, i1 addrspace(1)* %incond) {
+define amdgpu_kernel void @mul_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb, i1 addrspace(1)* %incond) {
entry:
%a = load <2 x i16>, <2 x i16> addrspace(1)* %ina, align 4
%b = load <2 x i16>, <2 x i16> addrspace(1)* %inb, align 4
Modified: llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll Tue Mar 21 16:39:51 2017
@@ -11,7 +11,7 @@
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc
; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]]
; GCN-NEXT: buffer_store_dword [[NEG_SELECT]]
-define void @select_fneg_posk_src_rcp_legacy_f32(i32 %c) #2 {
+define amdgpu_kernel void @select_fneg_posk_src_rcp_legacy_f32(i32 %c) #2 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
@@ -29,7 +29,7 @@ define void @select_fneg_posk_src_rcp_le
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[MUL]], vcc
; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]]
; GCN-NEXT: buffer_store_dword [[NEG_SELECT]]
-define void @select_fneg_posk_src_mul_legacy_f32(i32 %c) #2 {
+define amdgpu_kernel void @select_fneg_posk_src_mul_legacy_f32(i32 %c) #2 {
%x = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
%mul = call float @llvm.amdgcn.fmul.legacy(float %x, float 4.0)
Modified: llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll Tue Mar 21 16:39:51 2017
@@ -8,7 +8,7 @@
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
-define void @add_select_fabs_fabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fabs_fabs_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%z = load volatile float, float addrspace(1)* undef
@@ -30,7 +30,7 @@ define void @add_select_fabs_fabs_f32(i3
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[X]]|, [[W]]
-define void @add_select_multi_use_lhs_fabs_fabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_multi_use_lhs_fabs_fabs_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%z = load volatile float, float addrspace(1)* undef
@@ -57,7 +57,7 @@ define void @add_select_multi_use_lhs_fa
; GCN: buffer_store_dword [[ADD]]
; GCN: buffer_store_dword [[X_ABS]]
-define void @add_select_multi_store_use_lhs_fabs_fabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_multi_store_use_lhs_fabs_fabs_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%z = load volatile float, float addrspace(1)* undef
@@ -80,7 +80,7 @@ define void @add_select_multi_store_use_
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[Y]]|, [[W]]
-define void @add_select_multi_use_rhs_fabs_fabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_multi_use_rhs_fabs_fabs_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%z = load volatile float, float addrspace(1)* undef
@@ -104,7 +104,7 @@ define void @add_select_multi_use_rhs_fa
; GCN: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X_ABS]], vcc
; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
-define void @add_select_fabs_var_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fabs_var_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%z = load volatile float, float addrspace(1)* undef
@@ -123,7 +123,7 @@ define void @add_select_fabs_var_f32(i32
; GCN: v_and_b32_e32 [[FABS_X:v[0-9]+]], 0x7fffffff, [[X]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[FABS_X]], vcc
; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
-define void @add_select_fabs_negk_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fabs_negk_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
@@ -140,7 +140,7 @@ define void @add_select_fabs_negk_f32(i3
; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[X]]
-define void @add_select_fabs_negk_negk_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fabs_negk_negk_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, float -2.0, float -1.0
@@ -155,7 +155,7 @@ define void @add_select_fabs_negk_negk_f
; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, 2.0, s
; GCN: v_add_f32_e32 v{{[0-9]+}}, [[X]], [[SELECT]]
-define void @add_select_posk_posk_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_posk_posk_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, float 2.0, float 1.0
@@ -172,7 +172,7 @@ define void @add_select_posk_posk_f32(i3
; GCN-DAG: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[FABS_X]], vcc
; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
-define void @add_select_negk_fabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_negk_fabs_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
@@ -192,7 +192,7 @@ define void @add_select_negk_fabs_f32(i3
; GCN-DAG: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[FABS_X]], vcc
; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
-define void @add_select_negliteralk_fabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_negliteralk_fabs_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
@@ -209,7 +209,7 @@ define void @add_select_negliteralk_fabs
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Y]]
-define void @add_select_fabs_posk_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fabs_posk_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
@@ -228,7 +228,7 @@ define void @add_select_fabs_posk_f32(i3
; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Y]]
-define void @add_select_posk_fabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_posk_fabs_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
@@ -246,7 +246,7 @@ define void @add_select_posk_fabs_f32(i3
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
-define void @add_select_fneg_fneg_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fneg_fneg_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%z = load volatile float, float addrspace(1)* undef
@@ -268,7 +268,7 @@ define void @add_select_fneg_fneg_f32(i3
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
; GCN-DAG: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
; GCN-DAG: v_subrev_f32_e32 v{{[0-9]+}}, [[X]], [[W]]
-define void @add_select_multi_use_lhs_fneg_fneg_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_multi_use_lhs_fneg_fneg_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%z = load volatile float, float addrspace(1)* undef
@@ -295,7 +295,7 @@ define void @add_select_multi_use_lhs_fn
; GCN: buffer_store_dword [[ADD]]
; GCN: buffer_store_dword [[NEG_X]]
-define void @add_select_multi_store_use_lhs_fneg_fneg_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_multi_store_use_lhs_fneg_fneg_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%z = load volatile float, float addrspace(1)* undef
@@ -318,7 +318,7 @@ define void @add_select_multi_store_use_
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
; GCN-DAG: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
; GCN-DAG: v_subrev_f32_e32 v{{[0-9]+}}, [[Y]], [[W]]
-define void @add_select_multi_use_rhs_fneg_fneg_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_multi_use_rhs_fneg_fneg_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%z = load volatile float, float addrspace(1)* undef
@@ -342,7 +342,7 @@ define void @add_select_multi_use_rhs_fn
; GCN: v_xor_b32_e32 [[X_NEG:v[0-9]+]], 0x80000000, [[X]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X_NEG]], vcc
; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
-define void @add_select_fneg_var_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fneg_var_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%z = load volatile float, float addrspace(1)* undef
@@ -360,7 +360,7 @@ define void @add_select_fneg_var_f32(i32
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define void @add_select_fneg_negk_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fneg_negk_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
@@ -378,7 +378,7 @@ define void @add_select_fneg_negk_f32(i3
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc
; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define void @add_select_fneg_inv2pi_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fneg_inv2pi_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
@@ -398,7 +398,7 @@ define void @add_select_fneg_inv2pi_f32(
; VI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0.15915494, [[X]], vcc
; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define void @add_select_fneg_neginv2pi_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fneg_neginv2pi_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
@@ -415,7 +415,7 @@ define void @add_select_fneg_neginv2pi_f
; GCN: v_cmp_eq_u32_e64
; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
; GCN: v_add_f32_e32 v{{[0-9]+}}, [[X]], [[SELECT]]
-define void @add_select_negk_negk_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_negk_negk_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, float -2.0, float -1.0
@@ -432,7 +432,7 @@ define void @add_select_negk_negk_f32(i3
; GCN: v_cmp_eq_u32_e64
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K1]], [[K0]], vcc
; GCN: v_add_f32_e32 v{{[0-9]+}}, [[X]], [[SELECT]]
-define void @add_select_negliteralk_negliteralk_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_negliteralk_negliteralk_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, float -2048.0, float -4096.0
@@ -446,7 +446,7 @@ define void @add_select_negliteralk_negl
; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
-define void @add_select_fneg_negk_negk_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fneg_negk_negk_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, float -2.0, float -1.0
@@ -463,7 +463,7 @@ define void @add_select_fneg_negk_negk_f
; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define void @add_select_negk_fneg_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_negk_fneg_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
@@ -480,7 +480,7 @@ define void @add_select_negk_fneg_f32(i3
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc
; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define void @add_select_fneg_posk_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fneg_posk_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
@@ -498,7 +498,7 @@ define void @add_select_fneg_posk_f32(i3
; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc
; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define void @add_select_posk_fneg_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_posk_fneg_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
@@ -518,7 +518,7 @@ define void @add_select_posk_fneg_f32(i3
; GCN-DAG: v_and_b32_e32 [[Y_ABS:v[0-9]+]], 0x7fffffff, [[Y]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_ABS]], [[X_NEG_ABS]], vcc
; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
-define void @add_select_negfabs_fabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_negfabs_fabs_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%z = load volatile float, float addrspace(1)* undef
@@ -541,7 +541,7 @@ define void @add_select_negfabs_fabs_f32
; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_NEG_ABS]], [[X_ABS]], vcc
; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
-define void @add_select_fabs_negfabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fabs_negfabs_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%z = load volatile float, float addrspace(1)* undef
@@ -564,7 +564,7 @@ define void @add_select_fabs_negfabs_f32
; GCN-DAG: v_and_b32_e32 [[Y_ABS:v[0-9]+]], 0x7fffffff, [[Y]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_ABS]], [[X_NEG]], vcc
; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
-define void @add_select_neg_fabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_neg_fabs_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%z = load volatile float, float addrspace(1)* undef
@@ -586,7 +586,7 @@ define void @add_select_neg_fabs_f32(i32
; GCN-DAG: v_xor_b32_e32 [[Y_NEG:v[0-9]+]], 0x80000000, [[Y]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_NEG]], [[X_ABS]], vcc
; GCN: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
-define void @add_select_fabs_neg_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fabs_neg_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%z = load volatile float, float addrspace(1)* undef
@@ -607,7 +607,7 @@ define void @add_select_fabs_neg_f32(i32
; GCN-DAG: v_and_b32_e32 [[Y_ABS:v[0-9]+]], 0x7fffffff, [[Y]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_ABS]], [[X]], vcc
; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
-define void @add_select_neg_negfabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_neg_negfabs_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%z = load volatile float, float addrspace(1)* undef
@@ -629,7 +629,7 @@ define void @add_select_neg_negfabs_f32(
; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[X_ABS]], [[Y]], vcc
; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
-define void @add_select_negfabs_neg_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_negfabs_neg_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%z = load volatile float, float addrspace(1)* undef
@@ -651,7 +651,7 @@ define void @add_select_negfabs_neg_f32(
; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -4.0, [[X_ABS]], vcc
; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]]
-define void @mul_select_negfabs_posk_f32(i32 %c) #0 {
+define amdgpu_kernel void @mul_select_negfabs_posk_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
@@ -672,7 +672,7 @@ define void @mul_select_negfabs_posk_f32
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -4.0, [[X_ABS]], vcc
; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]]
-define void @mul_select_posk_negfabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @mul_select_posk_negfabs_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
@@ -690,7 +690,7 @@ define void @mul_select_posk_negfabs_f32
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 4.0, [[X]], vcc
; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]]
-define void @mul_select_negfabs_negk_f32(i32 %c) #0 {
+define amdgpu_kernel void @mul_select_negfabs_negk_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
@@ -709,7 +709,7 @@ define void @mul_select_negfabs_negk_f32
; GCN: v_cmp_ne_u32_e64 vcc
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 4.0, [[X]], vcc
; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]]
-define void @mul_select_negk_negfabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @mul_select_negk_negfabs_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
@@ -732,7 +732,7 @@ define void @mul_select_negk_negfabs_f32
; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], -4.0, [[X]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
; GCN-NEXT: buffer_store_dword [[SELECT]]
-define void @select_fneg_posk_src_add_f32(i32 %c) #0 {
+define amdgpu_kernel void @select_fneg_posk_src_add_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
@@ -749,7 +749,7 @@ define void @select_fneg_posk_src_add_f3
; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], 4.0, [[X]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
; GCN-NEXT: buffer_store_dword [[SELECT]]
-define void @select_fneg_posk_src_sub_f32(i32 %c) #0 {
+define amdgpu_kernel void @select_fneg_posk_src_sub_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
%add = fsub float %x, 4.0
@@ -765,7 +765,7 @@ define void @select_fneg_posk_src_sub_f3
; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[X]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[MUL]], vcc
; GCN-NEXT: buffer_store_dword [[SELECT]]
-define void @select_fneg_posk_src_mul_f32(i32 %c) #0 {
+define amdgpu_kernel void @select_fneg_posk_src_mul_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
%mul = fmul float %x, 4.0
@@ -782,7 +782,7 @@ define void @select_fneg_posk_src_mul_f3
; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[X]], -4.0, -[[Z]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[FMA]], vcc
; GCN-NEXT: buffer_store_dword [[SELECT]]
-define void @select_fneg_posk_src_fma_f32(i32 %c) #0 {
+define amdgpu_kernel void @select_fneg_posk_src_fma_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%z = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
@@ -799,7 +799,7 @@ define void @select_fneg_posk_src_fma_f3
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[X]], vcc
; GCN-NEXT: buffer_store_dword [[SELECT]]
-define void @select_fneg_posk_src_fmad_f32(i32 %c) #0 {
+define amdgpu_kernel void @select_fneg_posk_src_fmad_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%z = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
@@ -818,7 +818,7 @@ define void @select_fneg_posk_src_fmad_f
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc
; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]]
; GCN-NEXT: buffer_store_dword [[NEG_SELECT]]
-define void @select_fneg_posk_src_rcp_f32(i32 %c) #0 {
+define amdgpu_kernel void @select_fneg_posk_src_rcp_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/select-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/select-i1.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/select-i1.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/select-i1.ll Tue Mar 21 16:39:51 2017
@@ -6,7 +6,7 @@
; FUNC-LABEL: {{^}}select_i1:
; SI: v_cndmask_b32
; SI-NOT: v_cndmask_b32
-define void @select_i1(i1 addrspace(1)* %out, i32 %cond, i1 %a, i1 %b) nounwind {
+define amdgpu_kernel void @select_i1(i1 addrspace(1)* %out, i32 %cond, i1 %a, i1 %b) nounwind {
%cmp = icmp ugt i32 %cond, 5
%sel = select i1 %cmp, i1 %a, i1 %b
store i1 %sel, i1 addrspace(1)* %out, align 4
@@ -19,7 +19,7 @@ define void @select_i1(i1 addrspace(1)*
; SI-DAG: buffer_load_ubyte [[B:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
; SI: v_cmp_eq_u32_e32 vcc, 1, [[COND]]
; SI: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
-define void @s_minmax_i1(i1 addrspace(1)* %out, i1 zeroext %cond, i1 zeroext %a, i1 zeroext %b) nounwind {
+define amdgpu_kernel void @s_minmax_i1(i1 addrspace(1)* %out, i1 zeroext %cond, i1 zeroext %a, i1 zeroext %b) nounwind {
%cmp = icmp slt i1 %cond, false
%sel = select i1 %cmp, i1 %a, i1 %b
store i1 %sel, i1 addrspace(1)* %out, align 4
Modified: llvm/trunk/test/CodeGen/AMDGPU/select-opt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/select-opt.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/select-opt.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/select-opt.ll Tue Mar 21 16:39:51 2017
@@ -11,7 +11,7 @@
; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
; GCN-NOT: [[RESULT]]
; GCN: buffer_store_dword [[RESULT]]
-define void @opt_select_i32_and_cmp_i32(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @opt_select_i32_and_cmp_i32(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) #0 {
%icmp0 = icmp ne i32 %a, %b
%icmp1 = icmp ne i32 %a, %c
%and = and i1 %icmp0, %icmp1
@@ -27,7 +27,7 @@ define void @opt_select_i32_and_cmp_i32(
; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
; GCN-NOT: [[RESULT]]
; GCN: buffer_store_dword [[RESULT]]
-define void @opt_select_i32_and_cmp_f32(i32 addrspace(1)* %out, float %a, float %b, float %c, i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @opt_select_i32_and_cmp_f32(i32 addrspace(1)* %out, float %a, float %b, float %c, i32 %x, i32 %y) #0 {
%fcmp0 = fcmp one float %a, %b
%fcmp1 = fcmp one float %a, %c
%and = and i1 %fcmp0, %fcmp1
@@ -43,7 +43,7 @@ define void @opt_select_i32_and_cmp_f32(
; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}}
-define void @opt_select_i64_and_cmp_i32(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 {
+define amdgpu_kernel void @opt_select_i64_and_cmp_i32(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 {
%icmp0 = icmp ne i32 %a, %b
%icmp1 = icmp ne i32 %a, %c
%and = and i1 %icmp0, %icmp1
@@ -59,7 +59,7 @@ define void @opt_select_i64_and_cmp_i32(
; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}}
-define void @opt_select_i64_and_cmp_f32(i64 addrspace(1)* %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 {
+define amdgpu_kernel void @opt_select_i64_and_cmp_f32(i64 addrspace(1)* %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 {
%fcmp0 = fcmp one float %a, %b
%fcmp1 = fcmp one float %a, %c
%and = and i1 %fcmp0, %fcmp1
@@ -76,7 +76,7 @@ define void @opt_select_i64_and_cmp_f32(
; GCN-NOT: [[RESULT]]
; GCN: buffer_store_dword [[RESULT]]
; GCN: s_endpgm
-define void @opt_select_i32_or_cmp_i32(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @opt_select_i32_or_cmp_i32(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) #0 {
%icmp0 = icmp ne i32 %a, %b
%icmp1 = icmp ne i32 %a, %c
%or = or i1 %icmp0, %icmp1
@@ -92,7 +92,7 @@ define void @opt_select_i32_or_cmp_i32(i
; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
; GCN-NOT: [[RESULT]]
; GCN: buffer_store_dword [[RESULT]]
-define void @opt_select_i32_or_cmp_f32(i32 addrspace(1)* %out, float %a, float %b, float %c, i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @opt_select_i32_or_cmp_f32(i32 addrspace(1)* %out, float %a, float %b, float %c, i32 %x, i32 %y) #0 {
%fcmp0 = fcmp one float %a, %b
%fcmp1 = fcmp one float %a, %c
%or = or i1 %fcmp0, %fcmp1
@@ -108,7 +108,7 @@ define void @opt_select_i32_or_cmp_f32(i
; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}}
-define void @opt_select_i64_or_cmp_i32(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 {
+define amdgpu_kernel void @opt_select_i64_or_cmp_i32(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 {
%icmp0 = icmp ne i32 %a, %b
%icmp1 = icmp ne i32 %a, %c
%or = or i1 %icmp0, %icmp1
@@ -124,7 +124,7 @@ define void @opt_select_i64_or_cmp_i32(i
; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc
; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}}
-define void @opt_select_i64_or_cmp_f32(i64 addrspace(1)* %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 {
+define amdgpu_kernel void @opt_select_i64_or_cmp_f32(i64 addrspace(1)* %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 {
%fcmp0 = fcmp one float %a, %b
%fcmp1 = fcmp one float %a, %c
%or = or i1 %fcmp0, %fcmp1
@@ -138,7 +138,7 @@ define void @opt_select_i64_or_cmp_f32(i
; GCN: v_cmp_neq_f32_e64 vcc, s{{[0-9]+}}, 0
; GCN: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}
-define void @regression(float addrspace(1)* %out, float %c0, float %c1) #0 {
+define amdgpu_kernel void @regression(float addrspace(1)* %out, float %c0, float %c1) #0 {
entry:
%cmp0 = fcmp oeq float %c0, 1.0
br i1 %cmp0, label %if0, label %endif
Modified: llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll Tue Mar 21 16:39:51 2017
@@ -10,7 +10,7 @@
; SI: v_cndmask_b32_e32
; SI: v_cndmask_b32_e32
; SI: v_cndmask_b32_e32
-define void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b, i8 %c) nounwind {
+define amdgpu_kernel void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b, i8 %c) nounwind {
%cmp = icmp eq i8 %c, 0
%select = select i1 %cmp, <4 x i8> %a, <4 x i8> %b
store <4 x i8> %select, <4 x i8> addrspace(1)* %out, align 4
@@ -22,7 +22,7 @@ define void @select_v4i8(<4 x i8> addrsp
; SI: v_cndmask_b32_e32
; SI: v_cndmask_b32_e32
; SI: v_cndmask_b32_e32
-define void @select_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b, i32 %c) nounwind {
+define amdgpu_kernel void @select_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <4 x i16> %a, <4 x i16> %b
store <4 x i16> %select, <4 x i16> addrspace(1)* %out, align 4
@@ -36,7 +36,7 @@ define void @select_v4i16(<4 x i16> addr
; SI: v_cndmask_b32_e32
; SI: v_cndmask_b32_e32
; SI: buffer_store_dwordx2
-define void @s_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b, i32 %c) nounwind {
+define amdgpu_kernel void @s_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <2 x i32> %a, <2 x i32> %b
store <2 x i32> %select, <2 x i32> addrspace(1)* %out, align 8
@@ -49,7 +49,7 @@ define void @s_select_v2i32(<2 x i32> ad
; SI: v_cndmask_b32_e32
; SI: v_cndmask_b32_e32
; SI: buffer_store_dwordx4
-define void @s_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, i32 %c) nounwind {
+define amdgpu_kernel void @s_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <4 x i32> %a, <4 x i32> %b
store <4 x i32> %select, <4 x i32> addrspace(1)* %out, align 16
@@ -64,7 +64,7 @@ define void @s_select_v4i32(<4 x i32> ad
; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
; SI: buffer_store_dwordx4
-define void @v_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %cond) #0 {
+define amdgpu_kernel void @v_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %cond) #0 {
bb:
%tmp2 = icmp ult i32 %cond, 32
%val = load <4 x i32>, <4 x i32> addrspace(1)* %in
@@ -82,7 +82,7 @@ bb:
; SI: v_cndmask_b32_e32
; SI: v_cndmask_b32_e32
; SI: v_cndmask_b32_e32
-define void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b, i32 %c) nounwind {
+define amdgpu_kernel void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <8 x i32> %a, <8 x i32> %b
store <8 x i32> %select, <8 x i32> addrspace(1)* %out, align 16
@@ -102,7 +102,7 @@ define void @select_v8i32(<8 x i32> addr
; SI: v_mov_b32_e32 v{{[0-9]+}}, s[[BLO]]
; SI: v_cndmask_b32_e32
; SI: buffer_store_dwordx2
-define void @s_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b, i32 %c) nounwind {
+define amdgpu_kernel void @s_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <2 x float> %a, <2 x float> %b
store <2 x float> %select, <2 x float> addrspace(1)* %out, align 16
@@ -120,7 +120,7 @@ define void @s_select_v2f32(<2 x float>
; SI: v_cndmask_b32_e32
; SI: buffer_store_dwordx4
-define void @s_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b, i32 %c) nounwind {
+define amdgpu_kernel void @s_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <4 x float> %a, <4 x float> %b
store <4 x float> %select, <4 x float> addrspace(1)* %out, align 16
@@ -135,7 +135,7 @@ define void @s_select_v4f32(<4 x float>
; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
; SI: buffer_store_dwordx4
-define void @v_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in, i32 %cond) #0 {
+define amdgpu_kernel void @v_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in, i32 %cond) #0 {
bb:
%tmp2 = icmp ult i32 %cond, 32
%val = load <4 x float>, <4 x float> addrspace(1)* %in
@@ -153,7 +153,7 @@ bb:
; SI: v_cndmask_b32_e32
; SI: v_cndmask_b32_e32
; SI: v_cndmask_b32_e32
-define void @select_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b, i32 %c) nounwind {
+define amdgpu_kernel void @select_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <8 x float> %a, <8 x float> %b
store <8 x float> %select, <8 x float> addrspace(1)* %out, align 16
@@ -165,7 +165,7 @@ define void @select_v8f32(<8 x float> ad
; SI: v_cndmask_b32_e32
; SI: v_cndmask_b32_e32
; SI: v_cndmask_b32_e32
-define void @select_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b, i32 %c) nounwind {
+define amdgpu_kernel void @select_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <2 x double> %a, <2 x double> %b
store <2 x double> %select, <2 x double> addrspace(1)* %out, align 16
@@ -181,7 +181,7 @@ define void @select_v2f64(<2 x double> a
; SI: v_cndmask_b32_e32
; SI: v_cndmask_b32_e32
; SI: v_cndmask_b32_e32
-define void @select_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b, i32 %c) nounwind {
+define amdgpu_kernel void @select_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <4 x double> %a, <4 x double> %b
store <4 x double> %select, <4 x double> addrspace(1)* %out, align 16
@@ -205,7 +205,7 @@ define void @select_v4f64(<4 x double> a
; SI: v_cndmask_b32_e32
; SI: v_cndmask_b32_e32
; SI: v_cndmask_b32_e32
-define void @select_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b, i32 %c) nounwind {
+define amdgpu_kernel void @select_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <8 x double> %a, <8 x double> %b
store <8 x double> %select, <8 x double> addrspace(1)* %out, align 16
Modified: llvm/trunk/test/CodeGen/AMDGPU/select.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/select.f16.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/select.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/select.f16.ll Tue Mar 21 16:39:51 2017
@@ -17,7 +17,7 @@
; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
-define void @select_f16(
+define amdgpu_kernel void @select_f16(
half addrspace(1)* %r,
half addrspace(1)* %a,
half addrspace(1)* %b,
@@ -48,7 +48,7 @@ entry:
; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
-define void @select_f16_imm_a(
+define amdgpu_kernel void @select_f16_imm_a(
half addrspace(1)* %r,
half addrspace(1)* %b,
half addrspace(1)* %c,
@@ -78,7 +78,7 @@ entry:
; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
-define void @select_f16_imm_b(
+define amdgpu_kernel void @select_f16_imm_b(
half addrspace(1)* %r,
half addrspace(1)* %a,
half addrspace(1)* %c,
@@ -109,7 +109,7 @@ entry:
; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[C_F16]], v[[D_F16]], vcc
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
-define void @select_f16_imm_c(
+define amdgpu_kernel void @select_f16_imm_c(
half addrspace(1)* %r,
half addrspace(1)* %a,
half addrspace(1)* %b,
@@ -139,7 +139,7 @@ entry:
; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
-define void @select_f16_imm_d(
+define amdgpu_kernel void @select_f16_imm_d(
half addrspace(1)* %r,
half addrspace(1)* %a,
half addrspace(1)* %b,
@@ -168,7 +168,7 @@ entry:
; SI: v_cvt_f16_f32_e32
; SI: v_cvt_f16_f32_e32
; GCN: s_endpgm
-define void @select_v2f16(
+define amdgpu_kernel void @select_v2f16(
<2 x half> addrspace(1)* %r,
<2 x half> addrspace(1)* %a,
<2 x half> addrspace(1)* %b,
@@ -202,7 +202,7 @@ entry:
; SI: v_cvt_f16_f32_e32
; SI: v_cvt_f16_f32_e32
; GCN: s_endpgm
-define void @select_v2f16_imm_a(
+define amdgpu_kernel void @select_v2f16_imm_a(
<2 x half> addrspace(1)* %r,
<2 x half> addrspace(1)* %b,
<2 x half> addrspace(1)* %c,
@@ -235,7 +235,7 @@ entry:
; SI: v_cvt_f16_f32_e32
; SI: v_cvt_f16_f32_e32
; GCN: s_endpgm
-define void @select_v2f16_imm_b(
+define amdgpu_kernel void @select_v2f16_imm_b(
<2 x half> addrspace(1)* %r,
<2 x half> addrspace(1)* %a,
<2 x half> addrspace(1)* %c,
@@ -272,7 +272,7 @@ entry:
; SI: v_cvt_f16_f32_e32
; SI: v_cvt_f16_f32_e32
; GCN: s_endpgm
-define void @select_v2f16_imm_c(
+define amdgpu_kernel void @select_v2f16_imm_c(
<2 x half> addrspace(1)* %r,
<2 x half> addrspace(1)* %a,
<2 x half> addrspace(1)* %b,
@@ -304,7 +304,7 @@ entry:
; SI: v_cvt_f16_f32_e32
; SI: v_cvt_f16_f32_e32
; GCN: s_endpgm
-define void @select_v2f16_imm_d(
+define amdgpu_kernel void @select_v2f16_imm_d(
<2 x half> addrspace(1)* %r,
<2 x half> addrspace(1)* %a,
<2 x half> addrspace(1)* %b,
Modified: llvm/trunk/test/CodeGen/AMDGPU/select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/select.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/select.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/select.ll Tue Mar 21 16:39:51 2017
@@ -14,7 +14,7 @@
; EG-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
; EG-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XYZW
; EG-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XYZW
-define void @select (i32 addrspace(1)* %i32out, float addrspace(1)* %f32out,
+define amdgpu_kernel void @select (i32 addrspace(1)* %i32out, float addrspace(1)* %f32out,
<2 x i32> addrspace(1)* %v2i32out, <2 x float> addrspace(1)* %v2f32out,
<4 x i32> addrspace(1)* %v4i32out, <4 x float> addrspace(1)* %v4f32out,
i32 %cond) {
Modified: llvm/trunk/test/CodeGen/AMDGPU/select64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/select64.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/select64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/select64.ll Tue Mar 21 16:39:51 2017
@@ -7,7 +7,7 @@
; CHECK-NOT: s_lshr_b64
; CHECK: v_cndmask
; CHECK: v_cndmask
-define void @select0(i64 addrspace(1)* %out, i32 %cond, i64 %in) {
+define amdgpu_kernel void @select0(i64 addrspace(1)* %out, i32 %cond, i64 %in) {
entry:
%0 = icmp ugt i32 %cond, 5
%1 = select i1 %0, i64 0, i64 %in
@@ -18,7 +18,7 @@ entry:
; CHECK-LABEL: {{^}}select_trunc_i64:
; CHECK: v_cndmask_b32
; CHECK-NOT: v_cndmask_b32
-define void @select_trunc_i64(i32 addrspace(1)* %out, i32 %cond, i64 %in) nounwind {
+define amdgpu_kernel void @select_trunc_i64(i32 addrspace(1)* %out, i32 %cond, i64 %in) nounwind {
%cmp = icmp ugt i32 %cond, 5
%sel = select i1 %cmp, i64 0, i64 %in
%trunc = trunc i64 %sel to i32
@@ -29,7 +29,7 @@ define void @select_trunc_i64(i32 addrsp
; CHECK-LABEL: {{^}}select_trunc_i64_2:
; CHECK: v_cndmask_b32
; CHECK-NOT: v_cndmask_b32
-define void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 %a, i64 %b) nounwind {
+define amdgpu_kernel void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 %a, i64 %b) nounwind {
%cmp = icmp ugt i32 %cond, 5
%sel = select i1 %cmp, i64 %a, i64 %b
%trunc = trunc i64 %sel to i32
@@ -40,7 +40,7 @@ define void @select_trunc_i64_2(i32 addr
; CHECK-LABEL: {{^}}v_select_trunc_i64_2:
; CHECK: v_cndmask_b32
; CHECK-NOT: v_cndmask_b32
-define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
+define amdgpu_kernel void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%cmp = icmp ugt i32 %cond, 5
%a = load i64, i64 addrspace(1)* %aptr, align 8
%b = load i64, i64 addrspace(1)* %bptr, align 8
@@ -54,7 +54,7 @@ define void @v_select_trunc_i64_2(i32 ad
; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, 63, {{v[0-9]+}}
; CHECK: s_endpgm
-define void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
+define amdgpu_kernel void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%cmp = icmp ugt i32 %cond, 5
%a = load i64, i64 addrspace(1)* %aptr, align 8
%b = load i64, i64 addrspace(1)* %bptr, align 8
Modified: llvm/trunk/test/CodeGen/AMDGPU/selectcc-cnd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/selectcc-cnd.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/selectcc-cnd.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/selectcc-cnd.ll Tue Mar 21 16:39:51 2017
@@ -3,7 +3,7 @@
;CHECK-NOT: SETE
;CHECK: CNDE {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1.0, literal.x,
;CHECK: 1073741824
-define void @test(float addrspace(1)* %out, float addrspace(1)* %in) {
+define amdgpu_kernel void @test(float addrspace(1)* %out, float addrspace(1)* %in) {
%1 = load float, float addrspace(1)* %in
%2 = fcmp oeq float %1, 0.0
%3 = select i1 %2, float 1.0, float 2.0
Modified: llvm/trunk/test/CodeGen/AMDGPU/selectcc-cnde-int.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/selectcc-cnde-int.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/selectcc-cnde-int.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/selectcc-cnde-int.ll Tue Mar 21 16:39:51 2017
@@ -3,7 +3,7 @@
;CHECK-NOT: SETE_INT
;CHECK: CNDE_INT {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, literal.x,
;CHECK-NEXT: 2
-define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%1 = load i32, i32 addrspace(1)* %in
%2 = icmp eq i32 %1, 0
%3 = select i1 %2, i32 1, i32 2
Modified: llvm/trunk/test/CodeGen/AMDGPU/selectcc-icmp-select-float.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/selectcc-icmp-select-float.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/selectcc-icmp-select-float.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/selectcc-icmp-select-float.ll Tue Mar 21 16:39:51 2017
@@ -6,7 +6,7 @@
; CHECK-NEXT: -1
; Test a selectcc with i32 LHS/RHS and float True/False
-define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
%0 = load i32, i32 addrspace(1)* %in
%1 = icmp sge i32 %0, 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/selectcc-opt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/selectcc-opt.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/selectcc-opt.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/selectcc-opt.ll Tue Mar 21 16:39:51 2017
@@ -7,7 +7,7 @@
; EG-NOT: CND
; EG: SET{{[NEQGTL]+}}_DX10
-define void @test_a(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @test_a(i32 addrspace(1)* %out, float %in) {
entry:
%0 = fcmp olt float %in, 0.000000e+00
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
@@ -35,7 +35,7 @@ ENDIF:
; EG: SET{{[GTEQN]+}}_DX10
; EG-NEXT: PRED_
; EG-NEXT: ALU clause starting
-define void @test_b(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @test_b(i32 addrspace(1)* %out, float %in) {
entry:
%0 = fcmp olt float %in, 0.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
@@ -59,7 +59,7 @@ ENDIF:
; Test a CND*_INT instruction with float true/false values
; EG-LABEL: {{^}}test_c:
; EG: CND{{[GTE]+}}_INT
-define void @test_c(float addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @test_c(float addrspace(1)* %out, i32 %in) {
entry:
%0 = icmp sgt i32 %in, 0
%1 = select i1 %0, float 2.0, float 3.0
@@ -72,7 +72,7 @@ entry:
; SI-NEXT: v_cndmask_b32_e64
; SI-NOT: cmp
; SI-NOT: cndmask
-define void @selectcc_bool(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @selectcc_bool(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = select i1 %icmp0, i32 -1, i32 0
store i32 %ext, i32 addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/selectcc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/selectcc.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/selectcc.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/selectcc.ll Tue Mar 21 16:39:51 2017
@@ -11,7 +11,7 @@
; SI: v_cmp_eq_u64
; SI: v_cndmask
; SI: v_cndmask
-define void @selectcc_i64(i64 addrspace(1) * %out, i64 %lhs, i64 %rhs, i64 %true, i64 %false) {
+define amdgpu_kernel void @selectcc_i64(i64 addrspace(1) * %out, i64 %lhs, i64 %rhs, i64 %true, i64 %false) {
entry:
%0 = icmp eq i64 %lhs, %rhs
%1 = select i1 %0, i64 %true, i64 %false
Modified: llvm/trunk/test/CodeGen/AMDGPU/set-dx10.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/set-dx10.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/set-dx10.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/set-dx10.ll Tue Mar 21 16:39:51 2017
@@ -8,7 +8,7 @@
; CHECK: LSHR
; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
%0 = fcmp une float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
@@ -22,7 +22,7 @@ entry:
; CHECK: LSHR
; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
%0 = fcmp une float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
@@ -34,7 +34,7 @@ entry:
; CHECK: LSHR
; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
%0 = fcmp oeq float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
@@ -48,7 +48,7 @@ entry:
; CHECK: LSHR
; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
%0 = fcmp oeq float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
@@ -60,7 +60,7 @@ entry:
; CHECK: LSHR
; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
%0 = fcmp ogt float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
@@ -74,7 +74,7 @@ entry:
; CHECK: LSHR
; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
%0 = fcmp ogt float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
@@ -86,7 +86,7 @@ entry:
; CHECK: LSHR
; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
%0 = fcmp oge float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
@@ -100,7 +100,7 @@ entry:
; CHECK: LSHR
; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
%0 = fcmp oge float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
@@ -112,7 +112,7 @@ entry:
; CHECK: LSHR
; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
%0 = fcmp ole float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
@@ -126,7 +126,7 @@ entry:
; CHECK: LSHR
; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
%0 = fcmp ole float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
@@ -138,7 +138,7 @@ entry:
; CHECK: LSHR
; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
%0 = fcmp olt float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
@@ -152,7 +152,7 @@ entry:
; CHECK: LSHR
; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
%0 = fcmp olt float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/setcc-equivalent.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/setcc-equivalent.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/setcc-equivalent.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/setcc-equivalent.ll Tue Mar 21 16:39:51 2017
@@ -3,7 +3,7 @@
; EG-LABEL: {{^}}and_setcc_setcc_i32:
; EG: AND_INT
; EG-NEXT: SETE_INT
-define void @and_setcc_setcc_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+define amdgpu_kernel void @and_setcc_setcc_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
%cmp1 = icmp eq i32 %a, -1
%cmp2 = icmp eq i32 %b, -1
%and = and i1 %cmp1, %cmp2
@@ -20,7 +20,7 @@ define void @and_setcc_setcc_i32(i32 add
; EG: SETE_INT
; EG: AND_INT
; EG: SETE_INT
-define void @and_setcc_setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) {
+define amdgpu_kernel void @and_setcc_setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) {
%cmp1 = icmp eq <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
%cmp2 = icmp eq <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
%and = and <4 x i1> %cmp1, %cmp2
Modified: llvm/trunk/test/CodeGen/AMDGPU/setcc-fneg-constant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/setcc-fneg-constant.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/setcc-fneg-constant.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/setcc-fneg-constant.ll Tue Mar 21 16:39:51 2017
@@ -10,7 +10,7 @@
; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[B]], [[A]]
; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[MUL]]
; GCN: buffer_store_dword [[MUL]]
-define void @multi_use_fneg_src() #0 {
+define amdgpu_kernel void @multi_use_fneg_src() #0 {
%a = load volatile float, float addrspace(1)* undef
%b = load volatile float, float addrspace(1)* undef
%x = load volatile i32, i32 addrspace(1)* undef
@@ -33,7 +33,7 @@ define void @multi_use_fneg_src() #0 {
; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[B]], [[A]]
; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[A]]
; GCN: v_mul_f32_e64 [[USE1:v[0-9]+]], [[MUL]], -[[MUL]]
-define void @multi_foldable_use_fneg_src() #0 {
+define amdgpu_kernel void @multi_foldable_use_fneg_src() #0 {
%a = load volatile float, float addrspace(1)* undef
%b = load volatile float, float addrspace(1)* undef
%x = load volatile i32, i32 addrspace(1)* undef
@@ -59,7 +59,7 @@ define void @multi_foldable_use_fneg_src
; GCN-NEXT: v_cmp_eq_f32_e32 vcc, 4.0, [[MUL]]
; GCN-NOT: xor
; GCN: buffer_store_dword [[MUL]]
-define void @multi_use_fneg() #0 {
+define amdgpu_kernel void @multi_use_fneg() #0 {
%a = load volatile float, float addrspace(1)* undef
%b = load volatile float, float addrspace(1)* undef
%x = load volatile i32, i32 addrspace(1)* undef
@@ -82,7 +82,7 @@ define void @multi_use_fneg() #0 {
; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[MUL0]]
; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MUL0]], [[MUL0]]
; GCN: buffer_store_dword [[MUL1]]
-define void @multi_foldable_use_fneg() #0 {
+define amdgpu_kernel void @multi_foldable_use_fneg() #0 {
%a = load volatile float, float addrspace(1)* undef
%b = load volatile float, float addrspace(1)* undef
%x = load volatile i32, i32 addrspace(1)* undef
@@ -101,7 +101,7 @@ define void @multi_foldable_use_fneg() #
; GCN-LABEL: {{^}}test_setcc_fneg_oeq_posk_f32:
; GCN: v_cmp_eq_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define void @test_setcc_fneg_oeq_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_oeq_posk_f32() #0 {
%a = load volatile float, float addrspace(1)* undef
%x = load volatile i32, i32 addrspace(1)* undef
%y = load volatile i32, i32 addrspace(1)* undef
@@ -114,7 +114,7 @@ define void @test_setcc_fneg_oeq_posk_f3
; GCN-LABEL: {{^}}test_setcc_fneg_ogt_posk_f32:
; GCN: v_cmp_gt_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define void @test_setcc_fneg_ogt_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_ogt_posk_f32() #0 {
%a = load volatile float, float addrspace(1)* undef
%x = load volatile i32, i32 addrspace(1)* undef
%y = load volatile i32, i32 addrspace(1)* undef
@@ -127,7 +127,7 @@ define void @test_setcc_fneg_ogt_posk_f3
; GCN-LABEL: {{^}}test_setcc_fneg_oge_posk_f32:
; GCN: v_cmp_ge_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define void @test_setcc_fneg_oge_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_oge_posk_f32() #0 {
%a = load volatile float, float addrspace(1)* undef
%x = load volatile i32, i32 addrspace(1)* undef
%y = load volatile i32, i32 addrspace(1)* undef
@@ -140,7 +140,7 @@ define void @test_setcc_fneg_oge_posk_f3
; GCN-LABEL: {{^}}test_setcc_fneg_olt_posk_f32:
; GCN: v_cmp_lt_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define void @test_setcc_fneg_olt_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_olt_posk_f32() #0 {
%a = load volatile float, float addrspace(1)* undef
%x = load volatile i32, i32 addrspace(1)* undef
%y = load volatile i32, i32 addrspace(1)* undef
@@ -153,7 +153,7 @@ define void @test_setcc_fneg_olt_posk_f3
; GCN-LABEL: {{^}}test_setcc_fneg_ole_posk_f32:
; GCN: v_cmp_le_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define void @test_setcc_fneg_ole_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_ole_posk_f32() #0 {
%a = load volatile float, float addrspace(1)* undef
%x = load volatile i32, i32 addrspace(1)* undef
%y = load volatile i32, i32 addrspace(1)* undef
@@ -166,7 +166,7 @@ define void @test_setcc_fneg_ole_posk_f3
; GCN-LABEL: {{^}}test_setcc_fneg_one_posk_f32:
; GCN: v_cmp_lg_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define void @test_setcc_fneg_one_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_one_posk_f32() #0 {
%a = load volatile float, float addrspace(1)* undef
%x = load volatile i32, i32 addrspace(1)* undef
%y = load volatile i32, i32 addrspace(1)* undef
@@ -179,7 +179,7 @@ define void @test_setcc_fneg_one_posk_f3
; GCN-LABEL: {{^}}test_setcc_fneg_ueq_posk_f32:
; GCN: v_cmp_nlg_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define void @test_setcc_fneg_ueq_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_ueq_posk_f32() #0 {
%a = load volatile float, float addrspace(1)* undef
%x = load volatile i32, i32 addrspace(1)* undef
%y = load volatile i32, i32 addrspace(1)* undef
@@ -192,7 +192,7 @@ define void @test_setcc_fneg_ueq_posk_f3
; GCN-LABEL: {{^}}test_setcc_fneg_ugt_posk_f32:
; GCN: v_cmp_nle_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define void @test_setcc_fneg_ugt_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_ugt_posk_f32() #0 {
%a = load volatile float, float addrspace(1)* undef
%x = load volatile i32, i32 addrspace(1)* undef
%y = load volatile i32, i32 addrspace(1)* undef
@@ -205,7 +205,7 @@ define void @test_setcc_fneg_ugt_posk_f3
; GCN-LABEL: {{^}}test_setcc_fneg_uge_posk_f32:
; GCN: v_cmp_nlt_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define void @test_setcc_fneg_uge_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_uge_posk_f32() #0 {
%a = load volatile float, float addrspace(1)* undef
%x = load volatile i32, i32 addrspace(1)* undef
%y = load volatile i32, i32 addrspace(1)* undef
@@ -218,7 +218,7 @@ define void @test_setcc_fneg_uge_posk_f3
; GCN-LABEL: {{^}}test_setcc_fneg_ult_posk_f32:
; GCN: v_cmp_nge_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define void @test_setcc_fneg_ult_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_ult_posk_f32() #0 {
%a = load volatile float, float addrspace(1)* undef
%x = load volatile i32, i32 addrspace(1)* undef
%y = load volatile i32, i32 addrspace(1)* undef
@@ -231,7 +231,7 @@ define void @test_setcc_fneg_ult_posk_f3
; GCN-LABEL: {{^}}test_setcc_fneg_ule_posk_f32:
; GCN: v_cmp_ngt_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define void @test_setcc_fneg_ule_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_ule_posk_f32() #0 {
%a = load volatile float, float addrspace(1)* undef
%x = load volatile i32, i32 addrspace(1)* undef
%y = load volatile i32, i32 addrspace(1)* undef
@@ -244,7 +244,7 @@ define void @test_setcc_fneg_ule_posk_f3
; GCN-LABEL: {{^}}test_setcc_fneg_une_posk_f32:
; GCN: v_cmp_neq_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define void @test_setcc_fneg_une_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_une_posk_f32() #0 {
%a = load volatile float, float addrspace(1)* undef
%x = load volatile i32, i32 addrspace(1)* undef
%y = load volatile i32, i32 addrspace(1)* undef
Modified: llvm/trunk/test/CodeGen/AMDGPU/setcc-opt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/setcc-opt.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/setcc-opt.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/setcc-opt.ll Tue Mar 21 16:39:51 2017
@@ -11,7 +11,7 @@
; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
-define void @sext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @sext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp eq i32 %a, %b
%ext = sext i1 %icmp0 to i32
%icmp1 = icmp eq i32 %ext, 0
@@ -28,7 +28,7 @@ define void @sext_bool_icmp_eq_0(i1 addr
; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
-define void @sext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @sext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = sext i1 %icmp0 to i32
%icmp1 = icmp ne i32 %ext, 0
@@ -42,7 +42,7 @@ define void @sext_bool_icmp_ne_0(i1 addr
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; GCN-NEXT: buffer_store_byte [[RESULT]]
; GCN-NEXT: s_endpgm
-define void @sext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @sext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp eq i32 %a, %b
%ext = sext i1 %icmp0 to i32
%icmp1 = icmp eq i32 %ext, -1
@@ -56,7 +56,7 @@ define void @sext_bool_icmp_eq_neg1(i1 a
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; GCN-NEXT: buffer_store_byte [[RESULT]]
; GCN-NEXT: s_endpgm
-define void @sext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @sext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = sext i1 %icmp0 to i32
%icmp1 = icmp ne i32 %ext, -1
@@ -70,7 +70,7 @@ define void @sext_bool_icmp_ne_neg1(i1 a
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; GCN-NEXT: buffer_store_byte [[RESULT]]
; GCN-NEXT: s_endpgm
-define void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp eq i32 %a, %b
%ext = zext i1 %icmp0 to i32
%icmp1 = icmp eq i32 %ext, 0
@@ -84,7 +84,7 @@ define void @zext_bool_icmp_eq_0(i1 addr
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; GCN-NEXT: buffer_store_byte [[RESULT]]
; GCN-NEXT: s_endpgm
-define void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = zext i1 %icmp0 to i32
%icmp1 = icmp ne i32 %ext, 0
@@ -98,7 +98,7 @@ define void @zext_bool_icmp_ne_0(i1 addr
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; GCN-NEXT: buffer_store_byte [[RESULT]]
; GCN-NEXT: s_endpgm
-define void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp eq i32 %a, %b
%ext = zext i1 %icmp0 to i32
%icmp1 = icmp eq i32 %ext, 1
@@ -111,7 +111,7 @@ define void @zext_bool_icmp_eq_1(i1 addr
; GCN: v_cmp_eq_u32_e32 vcc,
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; GCN-NEXT: buffer_store_byte [[RESULT]]
-define void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = zext i1 %icmp0 to i32
%icmp1 = icmp ne i32 %ext, 1
@@ -124,7 +124,7 @@ define void @zext_bool_icmp_ne_1(i1 addr
; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 0{{$}}
; GCN: buffer_store_byte [[TMP]]
; GCN-NEXT: s_endpgm
-define void @zext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @zext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp eq i32 %a, %b
%ext = zext i1 %icmp0 to i32
%icmp1 = icmp eq i32 %ext, -1
@@ -137,7 +137,7 @@ define void @zext_bool_icmp_eq_neg1(i1 a
; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 1{{$}}
; GCN: buffer_store_byte [[TMP]]
; GCN-NEXT: s_endpgm
-define void @zext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @zext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = zext i1 %icmp0 to i32
%icmp1 = icmp ne i32 %ext, -1
@@ -159,7 +159,7 @@ define void @zext_bool_icmp_ne_neg1(i1 a
; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; GCN: buffer_store_byte [[RESULT]]
; GCN: s_endpgm
-define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
+define amdgpu_kernel void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
%b.ext = zext i8 %b to i32
%icmp0 = icmp ne i32 %b.ext, 255
store i1 %icmp0, i1 addrspace(1)* %out
@@ -172,7 +172,7 @@ define void @cmp_zext_k_i8max(i1 addrspa
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; GCN: buffer_store_byte [[RESULT]]
; GCN: s_endpgm
-define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind {
+define amdgpu_kernel void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind {
%b = load i8, i8 addrspace(1)* %b.ptr
%b.ext = sext i8 %b to i32
%icmp0 = icmp ne i32 %b.ext, -1
@@ -186,7 +186,7 @@ define void @cmp_sext_k_neg1(i1 addrspac
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
; GCN-NEXT: buffer_store_byte [[RESULT]]
; GCN: s_endpgm
-define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) nounwind {
+define amdgpu_kernel void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) nounwind {
%b.ext = sext i8 %b to i32
%icmp0 = icmp ne i32 %b.ext, -1
store i1 %icmp0, i1 addrspace(1)* %out
@@ -207,7 +207,7 @@ define void @cmp_sext_k_neg1_i8_sext_arg
; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; GCN: buffer_store_byte [[RESULT]]
; GCN: s_endpgm
-define void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind {
+define amdgpu_kernel void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind {
%b.ext = sext i8 %b to i32
%icmp0 = icmp ne i32 %b.ext, -1
store i1 %icmp0, i1 addrspace(1)* %out
@@ -218,7 +218,7 @@ define void @cmp_sext_k_neg1_i8_arg(i1 a
; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
; GCN: buffer_store_byte [[RESULT]]
; GCN: s_endpgm
-define void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind {
+define amdgpu_kernel void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind {
%b.ext = zext i8 %b to i32
%icmp0 = icmp ne i32 %b.ext, -1
store i1 %icmp0, i1 addrspace(1)* %out
@@ -229,7 +229,7 @@ define void @cmp_zext_k_neg1(i1 addrspac
; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
; GCN: buffer_store_byte [[RESULT]]
; GCN-NEXT: s_endpgm
-define void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = zext i1 %icmp0 to i32
%icmp1 = icmp ne i32 %ext, 2
@@ -241,7 +241,7 @@ define void @zext_bool_icmp_ne_k(i1 addr
; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
; GCN: buffer_store_byte [[RESULT]]
; GCN-NEXT: s_endpgm
-define void @zext_bool_icmp_eq_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @zext_bool_icmp_eq_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = zext i1 %icmp0 to i32
%icmp1 = icmp eq i32 %ext, 2
@@ -256,7 +256,7 @@ define void @zext_bool_icmp_eq_k(i1 addr
; FUNC-LABEL: {{^}}sext_bool_icmp_eq_1:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0{{$}}
; GCN: buffer_store_byte [[K]]
-define void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp eq i32 %a, %b
%ext = sext i1 %icmp0 to i32
%icmp1 = icmp eq i32 %ext, 1
@@ -267,7 +267,7 @@ define void @sext_bool_icmp_eq_1(i1 addr
; FUNC-LABEL: {{^}}sext_bool_icmp_ne_1:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 1{{$}}
; GCN: buffer_store_byte [[K]]
-define void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = sext i1 %icmp0 to i32
%icmp1 = icmp ne i32 %ext, 1
@@ -278,7 +278,7 @@ define void @sext_bool_icmp_ne_1(i1 addr
; FUNC-LABEL: {{^}}sext_bool_icmp_ne_k:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 1{{$}}
; GCN: buffer_store_byte [[K]]
-define void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = sext i1 %icmp0 to i32
%icmp1 = icmp ne i32 %ext, 2
Modified: llvm/trunk/test/CodeGen/AMDGPU/setcc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/setcc.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/setcc.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/setcc.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@ declare i32 @llvm.r600.read.tidig.x() no
; GCN-DAG: v_cmp_eq_u32_e32
; GCN-DAG: v_cmp_eq_u32_e64
-define void @setcc_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
+define amdgpu_kernel void @setcc_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
%result = icmp eq <2 x i32> %a, %b
%sext = sext <2 x i1> %result to <2 x i32>
store <2 x i32> %sext, <2 x i32> addrspace(1)* %out
@@ -26,7 +26,7 @@ define void @setcc_v2i32(<2 x i32> addrs
; GCN: v_cmp_eq_u32_e64
; GCN: v_cmp_eq_u32_e64
; GCN: v_cmp_eq_u32_e64
-define void @setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
%a = load <4 x i32>, <4 x i32> addrspace(1)* %in
%b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
@@ -43,7 +43,7 @@ define void @setcc_v4i32(<4 x i32> addrs
; FUNC-LABEL: {{^}}f32_oeq:
; R600: SETE_DX10
; GCN: v_cmp_eq_f32
-define void @f32_oeq(i32 addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_oeq(i32 addrspace(1)* %out, float %a, float %b) #0 {
entry:
%0 = fcmp oeq float %a, %b
%1 = sext i1 %0 to i32
@@ -54,7 +54,7 @@ entry:
; FUNC-LABEL: {{^}}f32_ogt:
; R600: SETGT_DX10
; GCN: v_cmp_gt_f32
-define void @f32_ogt(i32 addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_ogt(i32 addrspace(1)* %out, float %a, float %b) #0 {
entry:
%0 = fcmp ogt float %a, %b
%1 = sext i1 %0 to i32
@@ -65,7 +65,7 @@ entry:
; FUNC-LABEL: {{^}}f32_oge:
; R600: SETGE_DX10
; GCN: v_cmp_ge_f32
-define void @f32_oge(i32 addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_oge(i32 addrspace(1)* %out, float %a, float %b) #0 {
entry:
%0 = fcmp oge float %a, %b
%1 = sext i1 %0 to i32
@@ -76,7 +76,7 @@ entry:
; FUNC-LABEL: {{^}}f32_olt:
; R600: SETGT_DX10
; GCN: v_cmp_lt_f32
-define void @f32_olt(i32 addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_olt(i32 addrspace(1)* %out, float %a, float %b) #0 {
entry:
%0 = fcmp olt float %a, %b
%1 = sext i1 %0 to i32
@@ -87,7 +87,7 @@ entry:
; FUNC-LABEL: {{^}}f32_ole:
; R600: SETGE_DX10
; GCN: v_cmp_le_f32
-define void @f32_ole(i32 addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_ole(i32 addrspace(1)* %out, float %a, float %b) #0 {
entry:
%0 = fcmp ole float %a, %b
%1 = sext i1 %0 to i32
@@ -105,7 +105,7 @@ entry:
; GCN: v_cmp_lg_f32_e32 vcc
; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f32_one(i32 addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_one(i32 addrspace(1)* %out, float %a, float %b) #0 {
entry:
%0 = fcmp one float %a, %b
%1 = sext i1 %0 to i32
@@ -119,7 +119,7 @@ entry:
; R600-DAG: AND_INT
; R600-DAG: SETNE_INT
; GCN: v_cmp_o_f32
-define void @f32_ord(i32 addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_ord(i32 addrspace(1)* %out, float %a, float %b) #0 {
entry:
%0 = fcmp ord float %a, %b
%1 = sext i1 %0 to i32
@@ -137,7 +137,7 @@ entry:
; GCN: v_cmp_nlg_f32_e32 vcc
; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f32_ueq(i32 addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_ueq(i32 addrspace(1)* %out, float %a, float %b) #0 {
entry:
%0 = fcmp ueq float %a, %b
%1 = sext i1 %0 to i32
@@ -150,7 +150,7 @@ entry:
; R600: SETE_DX10
; GCN: v_cmp_nle_f32_e32 vcc
; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f32_ugt(i32 addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_ugt(i32 addrspace(1)* %out, float %a, float %b) #0 {
entry:
%0 = fcmp ugt float %a, %b
%1 = sext i1 %0 to i32
@@ -164,7 +164,7 @@ entry:
; GCN: v_cmp_nlt_f32_e32 vcc
; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f32_uge(i32 addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_uge(i32 addrspace(1)* %out, float %a, float %b) #0 {
entry:
%0 = fcmp uge float %a, %b
%1 = sext i1 %0 to i32
@@ -178,7 +178,7 @@ entry:
; GCN: v_cmp_nge_f32_e32 vcc
; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f32_ult(i32 addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_ult(i32 addrspace(1)* %out, float %a, float %b) #0 {
entry:
%0 = fcmp ult float %a, %b
%1 = sext i1 %0 to i32
@@ -192,7 +192,7 @@ entry:
; GCN: v_cmp_ngt_f32_e32 vcc
; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f32_ule(i32 addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_ule(i32 addrspace(1)* %out, float %a, float %b) #0 {
entry:
%0 = fcmp ule float %a, %b
%1 = sext i1 %0 to i32
@@ -203,7 +203,7 @@ entry:
; FUNC-LABEL: {{^}}f32_une:
; R600: SETNE_DX10
; GCN: v_cmp_neq_f32
-define void @f32_une(i32 addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_une(i32 addrspace(1)* %out, float %a, float %b) #0 {
entry:
%0 = fcmp une float %a, %b
%1 = sext i1 %0 to i32
@@ -217,7 +217,7 @@ entry:
; R600: OR_INT
; R600: SETNE_INT
; GCN: v_cmp_u_f32
-define void @f32_uno(i32 addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_uno(i32 addrspace(1)* %out, float %a, float %b) #0 {
entry:
%0 = fcmp uno float %a, %b
%1 = sext i1 %0 to i32
@@ -232,7 +232,7 @@ entry:
; FUNC-LABEL: {{^}}i32_eq:
; R600: SETE_INT
; GCN: v_cmp_eq_u32
-define void @i32_eq(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @i32_eq(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
entry:
%0 = icmp eq i32 %a, %b
%1 = sext i1 %0 to i32
@@ -243,7 +243,7 @@ entry:
; FUNC-LABEL: {{^}}i32_ne:
; R600: SETNE_INT
; GCN: v_cmp_ne_u32
-define void @i32_ne(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @i32_ne(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
entry:
%0 = icmp ne i32 %a, %b
%1 = sext i1 %0 to i32
@@ -254,7 +254,7 @@ entry:
; FUNC-LABEL: {{^}}i32_ugt:
; R600: SETGT_UINT
; GCN: v_cmp_gt_u32
-define void @i32_ugt(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @i32_ugt(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
entry:
%0 = icmp ugt i32 %a, %b
%1 = sext i1 %0 to i32
@@ -265,7 +265,7 @@ entry:
; FUNC-LABEL: {{^}}i32_uge:
; R600: SETGE_UINT
; GCN: v_cmp_ge_u32
-define void @i32_uge(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @i32_uge(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
entry:
%0 = icmp uge i32 %a, %b
%1 = sext i1 %0 to i32
@@ -276,7 +276,7 @@ entry:
; FUNC-LABEL: {{^}}i32_ult:
; R600: SETGT_UINT
; GCN: v_cmp_lt_u32
-define void @i32_ult(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @i32_ult(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
entry:
%0 = icmp ult i32 %a, %b
%1 = sext i1 %0 to i32
@@ -287,7 +287,7 @@ entry:
; FUNC-LABEL: {{^}}i32_ule:
; R600: SETGE_UINT
; GCN: v_cmp_le_u32
-define void @i32_ule(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @i32_ule(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
entry:
%0 = icmp ule i32 %a, %b
%1 = sext i1 %0 to i32
@@ -298,7 +298,7 @@ entry:
; FUNC-LABEL: {{^}}i32_sgt:
; R600: SETGT_INT
; GCN: v_cmp_gt_i32
-define void @i32_sgt(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @i32_sgt(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
entry:
%0 = icmp sgt i32 %a, %b
%1 = sext i1 %0 to i32
@@ -309,7 +309,7 @@ entry:
; FUNC-LABEL: {{^}}i32_sge:
; R600: SETGE_INT
; GCN: v_cmp_ge_i32
-define void @i32_sge(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @i32_sge(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
entry:
%0 = icmp sge i32 %a, %b
%1 = sext i1 %0 to i32
@@ -320,7 +320,7 @@ entry:
; FUNC-LABEL: {{^}}i32_slt:
; R600: SETGT_INT
; GCN: v_cmp_lt_i32
-define void @i32_slt(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @i32_slt(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
entry:
%0 = icmp slt i32 %a, %b
%1 = sext i1 %0 to i32
@@ -331,7 +331,7 @@ entry:
; FUNC-LABEL: {{^}}i32_sle:
; R600: SETGE_INT
; GCN: v_cmp_le_i32
-define void @i32_sle(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @i32_sle(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
entry:
%0 = icmp sle i32 %a, %b
%1 = sext i1 %0 to i32
@@ -348,7 +348,7 @@ entry:
; GCN-DAG: v_cmp_eq_u32
; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
; GCN: s_endpgm
-define void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %ptra, <3 x i32> addrspace(1)* %ptrb) #0 {
+define amdgpu_kernel void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %ptra, <3 x i32> addrspace(1)* %ptrb) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.a = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptra, i32 %tid
%gep.b = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptrb, i32 %tid
@@ -369,7 +369,7 @@ define void @v3i32_eq(<3 x i32> addrspac
; GCN-DAG: v_cmp_eq_u32
; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
; GCN: s_endpgm
-define void @v3i8_eq(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %ptra, <3 x i8> addrspace(1)* %ptrb) #0 {
+define amdgpu_kernel void @v3i8_eq(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %ptra, <3 x i8> addrspace(1)* %ptrb) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.a = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptra, i32 %tid
%gep.b = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptrb, i32 %tid
@@ -386,7 +386,7 @@ define void @v3i8_eq(<3 x i8> addrspace(
; FUNC-LABEL: setcc-i1
; GCN: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, 1
; GCN: s_cmp_eq_u32 [[AND]], 0
-define void @setcc-i1(i32 %in) #0 {
+define amdgpu_kernel void @setcc-i1(i32 %in) #0 {
%and = and i32 %in, 1
%cmp = icmp eq i32 %and, 0
br i1 %cmp, label %endif, label %if
@@ -400,7 +400,7 @@ endif:
; GCN-DAG: v_cmp_ge_f32_e64 [[A:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
; GCN-DAG: v_cmp_le_f32_e64 [[B:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0
; GCN: s_and_b64 s[2:3], [[A]], [[B]]
-define void @setcc-i1-and-xor(i32 addrspace(1)* %out, float %cond) #0 {
+define amdgpu_kernel void @setcc-i1-and-xor(i32 addrspace(1)* %out, float %cond) #0 {
bb0:
%tmp5 = fcmp oge float %cond, 0.000000e+00
%tmp7 = fcmp ole float %cond, 1.000000e+00
Modified: llvm/trunk/test/CodeGen/AMDGPU/setcc64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/setcc64.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/setcc64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/setcc64.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@
; GCN-LABEL: {{^}}f64_oeq:
; GCN: v_cmp_eq_f64
-define void @f64_oeq(i32 addrspace(1)* %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_oeq(i32 addrspace(1)* %out, double %a, double %b) #0 {
entry:
%tmp0 = fcmp oeq double %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -19,7 +19,7 @@ entry:
; GCN-LABEL: {{^}}f64_ogt:
; GCN: v_cmp_gt_f64
-define void @f64_ogt(i32 addrspace(1)* %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_ogt(i32 addrspace(1)* %out, double %a, double %b) #0 {
entry:
%tmp0 = fcmp ogt double %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -29,7 +29,7 @@ entry:
; GCN-LABEL: {{^}}f64_oge:
; GCN: v_cmp_ge_f64
-define void @f64_oge(i32 addrspace(1)* %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_oge(i32 addrspace(1)* %out, double %a, double %b) #0 {
entry:
%tmp0 = fcmp oge double %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -39,7 +39,7 @@ entry:
; GCN-LABEL: {{^}}f64_olt:
; GCN: v_cmp_lt_f64
-define void @f64_olt(i32 addrspace(1)* %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_olt(i32 addrspace(1)* %out, double %a, double %b) #0 {
entry:
%tmp0 = fcmp olt double %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -49,7 +49,7 @@ entry:
; GCN-LABEL: {{^}}f64_ole:
; GCN: v_cmp_le_f64
-define void @f64_ole(i32 addrspace(1)* %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_ole(i32 addrspace(1)* %out, double %a, double %b) #0 {
entry:
%tmp0 = fcmp ole double %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -60,7 +60,7 @@ entry:
; GCN-LABEL: {{^}}f64_one:
; GCN: v_cmp_lg_f64_e32 vcc
; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f64_one(i32 addrspace(1)* %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_one(i32 addrspace(1)* %out, double %a, double %b) #0 {
entry:
%tmp0 = fcmp one double %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -70,7 +70,7 @@ entry:
; GCN-LABEL: {{^}}f64_ord:
; GCN: v_cmp_o_f64
-define void @f64_ord(i32 addrspace(1)* %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_ord(i32 addrspace(1)* %out, double %a, double %b) #0 {
entry:
%tmp0 = fcmp ord double %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -81,7 +81,7 @@ entry:
; GCN-LABEL: {{^}}f64_ueq:
; GCN: v_cmp_nlg_f64_e32 vcc
; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f64_ueq(i32 addrspace(1)* %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_ueq(i32 addrspace(1)* %out, double %a, double %b) #0 {
entry:
%tmp0 = fcmp ueq double %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -93,7 +93,7 @@ entry:
; GCN: v_cmp_nle_f64_e32 vcc
; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f64_ugt(i32 addrspace(1)* %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_ugt(i32 addrspace(1)* %out, double %a, double %b) #0 {
entry:
%tmp0 = fcmp ugt double %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -104,7 +104,7 @@ entry:
; GCN-LABEL: {{^}}f64_uge:
; GCN: v_cmp_nlt_f64_e32 vcc
; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f64_uge(i32 addrspace(1)* %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_uge(i32 addrspace(1)* %out, double %a, double %b) #0 {
entry:
%tmp0 = fcmp uge double %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -115,7 +115,7 @@ entry:
; GCN-LABEL: {{^}}f64_ult:
; GCN: v_cmp_nge_f64_e32 vcc
; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f64_ult(i32 addrspace(1)* %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_ult(i32 addrspace(1)* %out, double %a, double %b) #0 {
entry:
%tmp0 = fcmp ult double %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -126,7 +126,7 @@ entry:
; GCN-LABEL: {{^}}f64_ule:
; GCN: v_cmp_ngt_f64_e32 vcc
; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define void @f64_ule(i32 addrspace(1)* %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_ule(i32 addrspace(1)* %out, double %a, double %b) #0 {
entry:
%tmp0 = fcmp ule double %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -136,7 +136,7 @@ entry:
; GCN-LABEL: {{^}}f64_une:
; GCN: v_cmp_neq_f64
-define void @f64_une(i32 addrspace(1)* %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_une(i32 addrspace(1)* %out, double %a, double %b) #0 {
entry:
%tmp0 = fcmp une double %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -146,7 +146,7 @@ entry:
; GCN-LABEL: {{^}}f64_uno:
; GCN: v_cmp_u_f64
-define void @f64_uno(i32 addrspace(1)* %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_uno(i32 addrspace(1)* %out, double %a, double %b) #0 {
entry:
%tmp0 = fcmp uno double %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -160,7 +160,7 @@ entry:
; GCN-LABEL: {{^}}i64_eq:
; GCN: v_cmp_eq_u64
-define void @i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
entry:
%tmp0 = icmp eq i64 %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -170,7 +170,7 @@ entry:
; GCN-LABEL: {{^}}i64_ne:
; GCN: v_cmp_ne_u64
-define void @i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
entry:
%tmp0 = icmp ne i64 %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -180,7 +180,7 @@ entry:
; GCN-LABEL: {{^}}i64_ugt:
; GCN: v_cmp_gt_u64
-define void @i64_ugt(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @i64_ugt(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
entry:
%tmp0 = icmp ugt i64 %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -190,7 +190,7 @@ entry:
; GCN-LABEL: {{^}}i64_uge:
; GCN: v_cmp_ge_u64
-define void @i64_uge(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @i64_uge(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
entry:
%tmp0 = icmp uge i64 %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -200,7 +200,7 @@ entry:
; GCN-LABEL: {{^}}i64_ult:
; GCN: v_cmp_lt_u64
-define void @i64_ult(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @i64_ult(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
entry:
%tmp0 = icmp ult i64 %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -210,7 +210,7 @@ entry:
; GCN-LABEL: {{^}}i64_ule:
; GCN: v_cmp_le_u64
-define void @i64_ule(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @i64_ule(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
entry:
%tmp0 = icmp ule i64 %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -220,7 +220,7 @@ entry:
; GCN-LABEL: {{^}}i64_sgt:
; GCN: v_cmp_gt_i64
-define void @i64_sgt(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @i64_sgt(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
entry:
%tmp0 = icmp sgt i64 %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -230,7 +230,7 @@ entry:
; GCN-LABEL: {{^}}i64_sge:
; GCN: v_cmp_ge_i64
-define void @i64_sge(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @i64_sge(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
entry:
%tmp0 = icmp sge i64 %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -240,7 +240,7 @@ entry:
; GCN-LABEL: {{^}}i64_slt:
; GCN: v_cmp_lt_i64
-define void @i64_slt(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @i64_slt(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
entry:
%tmp0 = icmp slt i64 %a, %b
%tmp1 = sext i1 %tmp0 to i32
@@ -250,7 +250,7 @@ entry:
; GCN-LABEL: {{^}}i64_sle:
; GCN: v_cmp_le_i64
-define void @i64_sle(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @i64_sle(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
entry:
%tmp0 = icmp sle i64 %a, %b
%tmp1 = sext i1 %tmp0 to i32
Modified: llvm/trunk/test/CodeGen/AMDGPU/sext-eliminate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sext-eliminate.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sext-eliminate.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sext-eliminate.ll Tue Mar 21 16:39:51 2017
@@ -6,7 +6,7 @@
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG: SUB_INT {{[* ]*}}[[RES]]
; EG-NOT: BFE
-define void @sext_in_reg_i1_i32_add(i32 addrspace(1)* %out, i1 %a, i32 %b) {
+define amdgpu_kernel void @sext_in_reg_i1_i32_add(i32 addrspace(1)* %out, i1 %a, i32 %b) {
%sext = sext i1 %a to i32
%res = add i32 %b, %sext
store i32 %res, i32 addrspace(1)* %out
@@ -18,7 +18,7 @@ define void @sext_in_reg_i1_i32_add(i32
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG: ADD_INT {{[* ]*}}[[RES]]
; EG-NOT: BFE
-define void @sext_in_reg_i1_i32_sub(i32 addrspace(1)* %out, i1 %a, i32 %b) {
+define amdgpu_kernel void @sext_in_reg_i1_i32_sub(i32 addrspace(1)* %out, i1 %a, i32 %b) {
%sext = sext i1 %a to i32
%res = sub i32 %b, %sext
store i32 %res, i32 addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll Tue Mar 21 16:39:51 2017
@@ -11,7 +11,7 @@
; EG: LSHR {{\*?}} [[ADDR]]
; Works with the align 2 removed
-define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
+define amdgpu_kernel void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
%c = add <2 x i32> %a, %b
%x = shl <2 x i32> %c, <i32 6, i32 6>
%y = ashr <2 x i32> %x, <i32 7, i32 7>
Modified: llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sext-in-reg.ll Tue Mar 21 16:39:51 2017
@@ -15,7 +15,7 @@
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG: LSHR * [[ADDR]]
; EG: BFE_INT * [[RES]], {{.*}}, 0.0, 1
-define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) #0 {
+define amdgpu_kernel void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) #0 {
%shl = shl i32 %in, 31
%sext = ashr i32 %shl, 31
store i32 %sext, i32 addrspace(1)* %out
@@ -32,7 +32,7 @@ define void @sext_in_reg_i1_i32(i32 addr
; EG: ADD_INT
; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
; EG-NEXT: LSHR * [[ADDR]]
-define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
%c = add i32 %a, %b ; add to prevent folding into extload
%shl = shl i32 %c, 24
%ashr = ashr i32 %shl, 24
@@ -50,7 +50,7 @@ define void @sext_in_reg_i8_to_i32(i32 a
; EG: ADD_INT
; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
; EG-NEXT: LSHR * [[ADDR]]
-define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
%c = add i32 %a, %b ; add to prevent folding into extload
%shl = shl i32 %c, 16
%ashr = ashr i32 %shl, 16
@@ -68,7 +68,7 @@ define void @sext_in_reg_i16_to_i32(i32
; EG: ADD_INT
; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
; EG-NEXT: LSHR * [[ADDR]]
-define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) #0 {
%c = add <1 x i32> %a, %b ; add to prevent folding into extload
%shl = shl <1 x i32> %c, <i32 24>
%ashr = ashr <1 x i32> %shl, <i32 24>
@@ -82,7 +82,7 @@ define void @sext_in_reg_i8_to_v1i32(<1
; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
-define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
%c = shl i64 %a, %b
%shl = shl i64 %c, 63
%ashr = ashr i64 %shl, 63
@@ -96,7 +96,7 @@ define void @sext_in_reg_i1_to_i64(i64 a
; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
-define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
%c = shl i64 %a, %b
%shl = shl i64 %c, 56
%ashr = ashr i64 %shl, 56
@@ -111,7 +111,7 @@ define void @sext_in_reg_i8_to_i64(i64 a
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
-define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
%c = shl i64 %a, %b
%shl = shl i64 %c, 48
%ashr = ashr i64 %shl, 48
@@ -125,7 +125,7 @@ define void @sext_in_reg_i16_to_i64(i64
; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
-define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
%c = shl i64 %a, %b
%shl = shl i64 %c, 32
%ashr = ashr i64 %shl, 32
@@ -140,7 +140,7 @@ define void @sext_in_reg_i32_to_i64(i64
; XGCN: buffer_store_dword
; XEG: BFE_INT
; XEG: ASHR
-; define void @sext_in_reg_i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a, <1 x i64> %b) #0 {
+; define amdgpu_kernel void @sext_in_reg_i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a, <1 x i64> %b) #0 {
; %c = add <1 x i64> %a, %b
; %shl = shl <1 x i64> %c, <i64 56>
; %ashr = ashr <1 x i64> %shl, <i64 56>
@@ -160,7 +160,7 @@ define void @sext_in_reg_i32_to_i64(i64
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
; GFX89: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
@@ -187,7 +187,7 @@ define void @v_sext_in_reg_i1_to_i64(i64
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
; GFX89: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
@@ -214,7 +214,7 @@ define void @v_sext_in_reg_i8_to_i64(i64
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
; GFX89: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
@@ -238,7 +238,7 @@ define void @v_sext_in_reg_i16_to_i64(i6
; GCN: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]]
; GFX89: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[SHR]]{{\]}}
-define void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
@@ -264,7 +264,7 @@ define void @v_sext_in_reg_i32_to_i64(i6
; EG: LSHL
; EG: ASHR [[RES]]
; EG: LSHR {{\*?}} [[ADDR]]
-define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
%c = add i32 %a, %b
%x = shl i32 %c, 6
%y = ashr i32 %x, 7
@@ -287,7 +287,7 @@ define void @sext_in_reg_i1_in_i32_other
; EG: LSHL
; EG: ASHR [[RES]]
; EG: LSHR {{\*?}} [[ADDR]]
-define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
%c = add <2 x i32> %a, %b
%x = shl <2 x i32> %c, <i32 6, i32 6>
%y = ashr <2 x i32> %x, <i32 7, i32 7>
@@ -305,7 +305,7 @@ define void @sext_in_reg_v2i1_in_v2i32_o
; EG: BFE_INT [[RES]]
; EG: BFE_INT [[RES]]
; EG: LSHR {{\*?}} [[ADDR]]
-define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
%c = add <2 x i32> %a, %b ; add to prevent folding into extload
%shl = shl <2 x i32> %c, <i32 31, i32 31>
%ashr = ashr <2 x i32> %shl, <i32 31, i32 31>
@@ -326,7 +326,7 @@ define void @sext_in_reg_v2i1_to_v2i32(<
; EG: BFE_INT [[RES]]
; EG: BFE_INT [[RES]]
; EG: LSHR {{\*?}} [[ADDR]]
-define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) #0 {
%c = add <4 x i32> %a, %b ; add to prevent folding into extload
%shl = shl <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
%ashr = ashr <4 x i32> %shl, <i32 31, i32 31, i32 31, i32 31>
@@ -343,7 +343,7 @@ define void @sext_in_reg_v4i1_to_v4i32(<
; EG: BFE_INT [[RES]]
; EG: BFE_INT [[RES]]
; EG: LSHR {{\*?}} [[ADDR]]
-define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
%c = add <2 x i32> %a, %b ; add to prevent folding into extload
%shl = shl <2 x i32> %c, <i32 24, i32 24>
%ashr = ashr <2 x i32> %shl, <i32 24, i32 24>
@@ -364,7 +364,7 @@ define void @sext_in_reg_v2i8_to_v2i32(<
; EG: BFE_INT [[RES]]
; EG: BFE_INT [[RES]]
; EG: LSHR {{\*?}} [[ADDR]]
-define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) #0 {
%c = add <4 x i32> %a, %b ; add to prevent folding into extload
%shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
%ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
@@ -381,7 +381,7 @@ define void @sext_in_reg_v4i8_to_v4i32(<
; EG: BFE_INT [[RES]]
; EG: BFE_INT [[RES]]
; EG: LSHR {{\*?}} [[ADDR]]
-define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
%c = add <2 x i32> %a, %b ; add to prevent folding into extload
%shl = shl <2 x i32> %c, <i32 16, i32 16>
%ashr = ashr <2 x i32> %shl, <i32 16, i32 16>
@@ -390,7 +390,7 @@ define void @sext_in_reg_v2i16_to_v2i32(
}
; FUNC-LABEL: {{^}}testcase:
-define void @testcase(i8 addrspace(1)* %out, i8 %a) #0 {
+define amdgpu_kernel void @testcase(i8 addrspace(1)* %out, i8 %a) #0 {
%and_a_1 = and i8 %a, 1
%cmp_eq = icmp eq i8 %and_a_1, 0
%cmp_slt = icmp slt i8 %a, 0
@@ -402,7 +402,7 @@ define void @testcase(i8 addrspace(1)* %
}
; FUNC-LABEL: {{^}}testcase_3:
-define void @testcase_3(i8 addrspace(1)* %out, i8 %a) #0 {
+define amdgpu_kernel void @testcase_3(i8 addrspace(1)* %out, i8 %a) #0 {
%and_a_1 = and i8 %a, 1
%cmp_eq = icmp eq i8 %and_a_1, 0
%cmp_slt = icmp slt i8 %a, 0
@@ -418,7 +418,7 @@ define void @testcase_3(i8 addrspace(1)*
; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
-define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) #0 {
+define amdgpu_kernel void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) #0 {
%loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16
%loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16
%c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
@@ -431,7 +431,7 @@ define void @vgpr_sext_in_reg_v4i8_to_v4
; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i16_to_v4i32:
; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
-define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) #0 {
+define amdgpu_kernel void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) #0 {
%loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16
%loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16
%c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
@@ -446,7 +446,7 @@ define void @vgpr_sext_in_reg_v4i16_to_v
; GCN: v_max_i32
; GCN-NOT: bfe
; GCN: buffer_store_short
-define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) #0 {
+define amdgpu_kernel void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) #0 {
%tmp5 = load i8, i8 addrspace(1)* %src, align 1
%tmp2 = sext i8 %tmp5 to i32
%tmp2.5 = icmp sgt i32 %tmp2, 0
@@ -462,7 +462,7 @@ declare i32 @llvm.AMDGPU.bfe.i32(i32, i3
; FUNC-LABEL: {{^}}bfe_0_width:
; GCN-NOT: {{[^@]}}bfe
; GCN: s_endpgm
-define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
%load = load i32, i32 addrspace(1)* %ptr, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone
store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -473,7 +473,7 @@ define void @bfe_0_width(i32 addrspace(1
; GCN: v_bfe_i32
; GCN-NOT: {{[^@]}}bfe
; GCN: s_endpgm
-define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
%load = load i32, i32 addrspace(1)* %ptr, align 4
%bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
%bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
@@ -484,7 +484,7 @@ define void @bfe_8_bfe_8(i32 addrspace(1
; FUNC-LABEL: {{^}}bfe_8_bfe_16:
; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
; GCN: s_endpgm
-define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
%load = load i32, i32 addrspace(1)* %ptr, align 4
%bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
%bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone
@@ -497,7 +497,7 @@ define void @bfe_8_bfe_16(i32 addrspace(
; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
; GCN-NOT: {{[^@]}}bfe
; GCN: s_endpgm
-define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
%load = load i32, i32 addrspace(1)* %ptr, align 4
%bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone
%bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
@@ -510,7 +510,7 @@ define void @bfe_16_bfe_8(i32 addrspace(
; GCN: s_sext_i32_i8 s{{[0-9]+}}, s{{[0-9]+}}
; GCN-NOT: {{[^@]}}bfe
; GCN: s_endpgm
-define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
%c = add i32 %a, %b ; add to prevent folding into extload
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone
%shl = shl i32 %bfe, 24
@@ -520,7 +520,7 @@ define void @sext_in_reg_i8_to_i32_bfe(i
}
; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe_wrong:
-define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
%c = add i32 %a, %b ; add to prevent folding into extload
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 8, i32 0) nounwind readnone
%shl = shl i32 %bfe, 24
@@ -533,7 +533,7 @@ define void @sext_in_reg_i8_to_i32_bfe_w
; GCN: buffer_load_sbyte
; GCN-NOT: {{[^@]}}bfe
; GCN: s_endpgm
-define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 {
%load = load i8, i8 addrspace(1)* %ptr, align 1
%sext = sext i8 %load to i32
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone
@@ -547,7 +547,7 @@ define void @sextload_i8_to_i32_bfe(i32
; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe_0:{{.*$}}
; GCN-NOT: {{[^@]}}bfe
; GCN: s_endpgm
-define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 {
%load = load i8, i8 addrspace(1)* %ptr, align 1
%sext = sext i8 %load to i32
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone
@@ -562,7 +562,7 @@ define void @sextload_i8_to_i32_bfe_0(i3
; GCN-NOT: shl
; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
; GCN: s_endpgm
-define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%shr = ashr i32 %shl, 31
@@ -577,7 +577,7 @@ define void @sext_in_reg_i1_bfe_offset_0
; GCN-NOT: shr
; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
; GCN: s_endpgm
-define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 30
%shr = ashr i32 %shl, 30
@@ -593,7 +593,7 @@ define void @sext_in_reg_i1_bfe_offset_1
; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 2
; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
; GCN: s_endpgm
-define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 30
%shr = ashr i32 %shl, 30
@@ -617,7 +617,7 @@ define void @sext_in_reg_i2_bfe_offset_1
; GCN-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[HI]]
; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
; GFX89: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
-define void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
@@ -647,7 +647,7 @@ define void @v_sext_in_reg_i1_to_i64_mov
; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
; GFX89: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
-define void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
@@ -673,7 +673,7 @@ define void @v_sext_in_reg_i32_to_i64_mo
; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15
; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15
-define void @s_sext_in_reg_i1_i16(i16 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
+define amdgpu_kernel void @s_sext_in_reg_i1_i16(i16 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
%ld = load i32, i32 addrspace(2)* %ptr
%in = trunc i32 %ld to i16
%shl = shl i16 %in, 15
@@ -692,7 +692,7 @@ define void @s_sext_in_reg_i1_i16(i16 ad
; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14
; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14
-define void @s_sext_in_reg_i2_i16(i16 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
+define amdgpu_kernel void @s_sext_in_reg_i2_i16(i16 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
%ld = load i32, i32 addrspace(2)* %ptr
%in = trunc i32 %ld to i16
%shl = shl i16 %in, 14
@@ -706,7 +706,7 @@ define void @s_sext_in_reg_i2_i16(i16 ad
; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[VAL]], 0, 1{{$}}
; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]]
-define void @v_sext_in_reg_i1_i16(i16 addrspace(3)* %out, i16 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i1_i16(i16 addrspace(3)* %out, i16 addrspace(1)* %ptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
%gep = getelementptr i16, i16 addrspace(1)* %ptr, i32 %tid
%out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid
@@ -727,7 +727,7 @@ define void @v_sext_in_reg_i1_i16(i16 ad
; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[REG]], 0, 1{{$}}
; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]]
-define void @v_sext_in_reg_i1_i16_nonload(i16 addrspace(3)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 %s.val) nounwind {
+define amdgpu_kernel void @v_sext_in_reg_i1_i16_nonload(i16 addrspace(3)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 %s.val) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
%a.gep = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
%b.gep = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
@@ -753,7 +753,7 @@ define void @v_sext_in_reg_i1_i16_nonloa
; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14{{$}}
; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14{{$}}
-define void @s_sext_in_reg_i2_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 {
+define amdgpu_kernel void @s_sext_in_reg_i2_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 {
%shl = shl i16 %in, 14
%sext = ashr i16 %shl, 14
store i16 %sext, i16 addrspace(1)* %out
@@ -770,7 +770,7 @@ define void @s_sext_in_reg_i2_i16_arg(i1
; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8{{$}}
; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8{{$}}
-define void @s_sext_in_reg_i8_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 {
+define amdgpu_kernel void @s_sext_in_reg_i8_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 {
%shl = shl i16 %in, 8
%sext = ashr i16 %shl, 8
store i16 %sext, i16 addrspace(1)* %out
@@ -787,7 +787,7 @@ define void @s_sext_in_reg_i8_i16_arg(i1
; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1{{$}}
; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1{{$}}
-define void @s_sext_in_reg_i15_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 {
+define amdgpu_kernel void @s_sext_in_reg_i15_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 {
%shl = shl i16 %in, 1
%sext = ashr i16 %shl, 1
store i16 %sext, i16 addrspace(1)* %out
@@ -798,7 +798,7 @@ define void @s_sext_in_reg_i15_i16_arg(i
; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]]
; GFX9: v_pk_lshlrev_b16 [[SHL:v[0-9]+]], 15, [[ADD]]
; GFX9: v_pk_ashrrev_i16 [[SRA:v[0-9]+]], 15, [[SHL]]
-define void @sext_in_reg_v2i1_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 {
%c = add <2 x i16> %a, %b ; add to prevent folding into extload
%shl = shl <2 x i16> %c, <i16 15, i16 15>
%ashr = ashr <2 x i16> %shl, <i16 15, i16 15>
@@ -813,7 +813,7 @@ define void @sext_in_reg_v2i1_to_v2i16(<
; GFX9: v_pk_lshlrev_b16 v{{[0-9]+}}, 15, v{{[0-9]+}}
; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 15, v{{[0-9]+}}
; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 15, v{{[0-9]+}}
-define void @sext_in_reg_v3i1_to_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %a, <3 x i16> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v3i1_to_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %a, <3 x i16> %b) #0 {
%c = add <3 x i16> %a, %b ; add to prevent folding into extload
%shl = shl <3 x i16> %c, <i16 15, i16 15, i16 15>
%ashr = ashr <3 x i16> %shl, <i16 15, i16 15, i16 15>
@@ -825,7 +825,7 @@ define void @sext_in_reg_v3i1_to_v3i16(<
; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]]
; GFX9: v_pk_lshlrev_b16 [[SHL:v[0-9]+]], 14, [[ADD]]
; GFX9: v_pk_ashrrev_i16 [[SRA:v[0-9]+]], 14, [[SHL]]
-define void @sext_in_reg_v2i2_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i2_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 {
%c = add <2 x i16> %a, %b ; add to prevent folding into extload
%shl = shl <2 x i16> %c, <i16 14, i16 14>
%ashr = ashr <2 x i16> %shl, <i16 14, i16 14>
@@ -837,7 +837,7 @@ define void @sext_in_reg_v2i2_to_v2i16(<
; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]]
; GFX9: v_pk_lshlrev_b16 [[SHL:v[0-9]+]], 8, [[ADD]]
; GFX9: v_pk_ashrrev_i16 [[SRA:v[0-9]+]], 8, [[SHL]]
-define void @sext_in_reg_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 {
%c = add <2 x i16> %a, %b ; add to prevent folding into extload
%shl = shl <2 x i16> %c, <i16 8, i16 8>
%ashr = ashr <2 x i16> %shl, <i16 8, i16 8>
@@ -852,7 +852,7 @@ define void @sext_in_reg_v2i8_to_v2i16(<
; GFX9: v_pk_lshlrev_b16 v{{[0-9]+}}, 8, v{{[0-9]+}}
; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 8, v{{[0-9]+}}
; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 8, v{{[0-9]+}}
-define void @sext_in_reg_v3i8_to_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %a, <3 x i16> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v3i8_to_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %a, <3 x i16> %b) #0 {
%c = add <3 x i16> %a, %b ; add to prevent folding into extload
%shl = shl <3 x i16> %c, <i16 8, i16 8, i16 8>
%ashr = ashr <3 x i16> %shl, <i16 8, i16 8, i16 8>
Modified: llvm/trunk/test/CodeGen/AMDGPU/sgpr-control-flow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sgpr-control-flow.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sgpr-control-flow.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sgpr-control-flow.ll Tue Mar 21 16:39:51 2017
@@ -13,7 +13,7 @@
; SI: s_sub
-define void @sgpr_if_else_salu_br(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+define amdgpu_kernel void @sgpr_if_else_salu_br(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
entry:
%0 = icmp eq i32 %a, 0
br i1 %0, label %if, label %else
@@ -52,7 +52,7 @@ endif:
; SI: s_add_i32 s{{[0-9]+}}, [[LOAD0]], [[LOAD1]]
; SI: buffer_store_dword
; SI-NEXT: s_endpgm
-define void @sgpr_if_else_salu_br_opt(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+define amdgpu_kernel void @sgpr_if_else_salu_br_opt(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
entry:
%0 = icmp eq i32 %a, 0
br i1 %0, label %if, label %else
@@ -79,7 +79,7 @@ endif:
; SI: s_add_i32 [[SGPR:s[0-9]+]]
; SI-NOT: s_add_i32 [[SGPR]]
-define void @sgpr_if_else_valu_br(i32 addrspace(1)* %out, float %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+define amdgpu_kernel void @sgpr_if_else_valu_br(i32 addrspace(1)* %out, float %a, i32 %b, i32 %c, i32 %d, i32 %e) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%tid_f = uitofp i32 %tid to float
@@ -116,7 +116,7 @@ endif:
; SI: v_cmp_ne_u32_e32 [[CMP_CMP:vcc]], 0, [[V_CMP]]
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP_CMP]]
; SI: buffer_store_dword [[RESULT]]
-define void @sgpr_if_else_valu_cmp_phi_br(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
+define amdgpu_kernel void @sgpr_if_else_valu_cmp_phi_br(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%tmp1 = icmp eq i32 %tid, 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy-duplicate-operand.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy-duplicate-operand.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy-duplicate-operand.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy-duplicate-operand.ll Tue Mar 21 16:39:51 2017
@@ -6,7 +6,7 @@
; SI-LABEL: {{^}}test_dup_operands:
; SI: v_add_i32_e32
-define void @test_dup_operands(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) {
+define amdgpu_kernel void @test_dup_operands(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) {
%a = load <2 x i32>, <2 x i32> addrspace(1)* %in
%lo = extractelement <2 x i32> %a, i32 0
%hi = extractelement <2 x i32> %a, i32 1
Modified: llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll Tue Mar 21 16:39:51 2017
@@ -265,7 +265,7 @@ endif:
; CHECK: buffer_load_dword
; CHECK: v_add
; CHECK: s_endpgm
-define void @copy1(float addrspace(1)* %out, float addrspace(1)* %in0) {
+define amdgpu_kernel void @copy1(float addrspace(1)* %out, float addrspace(1)* %in0) {
entry:
%tmp = load float, float addrspace(1)* %in0
%tmp1 = fcmp oeq float %tmp, 0.000000e+00
Modified: llvm/trunk/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll Tue Mar 21 16:39:51 2017
@@ -11,7 +11,7 @@
; GCN: buffer_store_dwordx4 v{{\[}}[[SHIFT]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: s_endpgm
-define void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
@@ -33,7 +33,7 @@ define void @v_uextract_bit_31_i128(i128
; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[SHIFT]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: s_endpgm
-define void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
@@ -55,7 +55,7 @@ define void @v_uextract_bit_63_i128(i128
; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[SHIFT]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: s_endpgm
-define void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
@@ -77,7 +77,7 @@ define void @v_uextract_bit_95_i128(i128
; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[SHIFT]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: s_endpgm
-define void @v_uextract_bit_127_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_127_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
@@ -100,7 +100,7 @@ define void @v_uextract_bit_127_i128(i12
; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[OR0]]:[[ZERO]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: s_endpgm
-define void @v_uextract_bit_34_100_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_34_100_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
Modified: llvm/trunk/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll Tue Mar 21 16:39:51 2017
@@ -8,7 +8,7 @@
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
-define void @v_uextract_bit_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
@@ -25,7 +25,7 @@ define void @v_uextract_bit_31_i64(i64 a
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
-define void @v_uextract_bit_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
@@ -41,7 +41,7 @@ define void @v_uextract_bit_63_i64(i64 a
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
-define void @v_uextract_bit_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
@@ -57,7 +57,7 @@ define void @v_uextract_bit_1_i64(i64 ad
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 20, 1
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
-define void @v_uextract_bit_20_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_20_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
@@ -73,7 +73,7 @@ define void @v_uextract_bit_20_i64(i64 a
; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 1, [[VAL]]
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
-define void @v_uextract_bit_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
@@ -89,7 +89,7 @@ define void @v_uextract_bit_32_i64(i64 a
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1{{$}}
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
-define void @v_uextract_bit_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
@@ -105,7 +105,7 @@ define void @v_uextract_bit_33_i64(i64 a
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 20, 2
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
-define void @v_uextract_bit_20_21_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_20_21_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
@@ -121,7 +121,7 @@ define void @v_uextract_bit_20_21_i64(i6
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
-define void @v_uextract_bit_1_30_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_1_30_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
@@ -137,7 +137,7 @@ define void @v_uextract_bit_1_30_i64(i64
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 1, [[VAL]]
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
-define void @v_uextract_bit_1_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_1_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
@@ -155,7 +155,7 @@ define void @v_uextract_bit_1_31_i64(i64
; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 3, v[[SHRLO]]{{$}}
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
-define void @v_uextract_bit_31_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
@@ -171,7 +171,7 @@ define void @v_uextract_bit_31_32_i64(i6
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 2
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
-define void @v_uextract_bit_32_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_32_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
@@ -188,7 +188,7 @@ define void @v_uextract_bit_32_33_i64(i6
; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 0x3fffffff, v[[SHRLO]]{{$}}
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
-define void @v_uextract_bit_30_60_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_30_60_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
@@ -204,7 +204,7 @@ define void @v_uextract_bit_30_60_i64(i6
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30
; GCN-DAG: v_mov_b32_e32 v[[BFE:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
-define void @v_uextract_bit_33_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_33_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
@@ -220,7 +220,7 @@ define void @v_uextract_bit_33_63_i64(i6
; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31
; GCN-NEXT: v_mov_b32_e32 v[[SHRHI]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}}
-define void @v_uextract_bit_31_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
@@ -236,7 +236,7 @@ define void @v_uextract_bit_31_63_i64(i6
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
; GCN: buffer_store_dword v[[SHIFT]]
-define void @v_uextract_bit_31_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
@@ -252,7 +252,7 @@ define void @v_uextract_bit_31_i64_trunc
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[VAL]], 3, 1{{$}}
; GCN: buffer_store_dword [[BFE]]
-define void @v_uextract_bit_3_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_3_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
@@ -268,7 +268,7 @@ define void @v_uextract_bit_3_i64_trunc_
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[VAL]], 1, 1{{$}}
; GCN: buffer_store_dword [[BFE]]
-define void @v_uextract_bit_33_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_33_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
@@ -286,7 +286,7 @@ define void @v_uextract_bit_33_i64_trunc
; GCN-NEXT: v_and_b32_e32 v[[SHRLO]], 3, v[[SHRLO]]
; GCN-NOT: v[[SHRLO]]
; GCN: buffer_store_dword v[[SHRLO]]
-define void @v_uextract_bit_31_32_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_32_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
@@ -306,7 +306,7 @@ define void @v_uextract_bit_31_32_i64_tr
; GCN-NOT: v[[SHRLO]]
; GCN-NOT: v[[SHRHI]]
; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}}
-define void @and_not_mask_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @and_not_mask_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
@@ -327,7 +327,7 @@ define void @and_not_mask_i64(i64 addrsp
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}}
; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
-define void @v_uextract_bit_27_29_multi_use_shift_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_27_29_multi_use_shift_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
@@ -347,7 +347,7 @@ define void @v_uextract_bit_27_29_multi_
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 2, 3
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHR]]:[[ZERO_SHR]]{{\]}}
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO_BFE]]{{\]}}
-define void @v_uextract_bit_34_37_multi_use_shift_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_34_37_multi_use_shift_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
@@ -365,7 +365,7 @@ define void @v_uextract_bit_34_37_multi_
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
; GCN: buffer_store_dword v[[ZERO]]
-define void @v_uextract_bit_33_36_use_upper_half_shift_i64(i64 addrspace(1)* %out0, i32 addrspace(1)* %out1, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_33_36_use_upper_half_shift_i64(i64 addrspace(1)* %out0, i32 addrspace(1)* %out1, i64 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
%out0.gep = getelementptr i64, i64 addrspace(1)* %out0, i32 %id.x
Modified: llvm/trunk/test/CodeGen/AMDGPU/shift-i64-opts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shift-i64-opts.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shift-i64-opts.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shift-i64-opts.ll Tue Mar 21 16:39:51 2017
@@ -8,7 +8,7 @@
; GCN-DAG: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 3, [[VAL]]
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @lshr_i64_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @lshr_i64_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = lshr i64 %val, 35
store i64 %shl, i64 addrspace(1)* %out
@@ -20,7 +20,7 @@ define void @lshr_i64_35(i64 addrspace(1
; GCN-DAG: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 31, [[VAL]]
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @lshr_i64_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @lshr_i64_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = lshr i64 %val, 63
store i64 %shl, i64 addrspace(1)* %out
@@ -32,7 +32,7 @@ define void @lshr_i64_63(i64 addrspace(1
; GCN-DAG: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 1, [[VAL]]
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @lshr_i64_33(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @lshr_i64_33(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = lshr i64 %val, 33
store i64 %shl, i64 addrspace(1)* %out
@@ -43,7 +43,7 @@ define void @lshr_i64_33(i64 addrspace(1
; GCN-DAG: buffer_load_dword v[[LO:[0-9]+]]
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @lshr_i64_32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @lshr_i64_32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = lshr i64 %val, 32
store i64 %shl, i64 addrspace(1)* %out
@@ -58,7 +58,7 @@ define void @lshr_i64_32(i64 addrspace(1
; GCN: v_bfe_u32 v[[BFE:[0-9]+]], v[[HI]], 8, 23
; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
-define void @lshr_and_i64_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @lshr_and_i64_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%and = and i64 %val, 9223372036854775807 ; 0x7fffffffffffffff
%shl = lshr i64 %and, 40
@@ -73,7 +73,7 @@ define void @lshr_and_i64_35(i64 addrspa
; GCN: v_lshlrev_b32_e32 v[[HI:[0-9]+]], 3, [[VAL]]
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @shl_i64_const_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @shl_i64_const_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = shl i64 %val, 35
store i64 %shl, i64 addrspace(1)* %out
@@ -84,7 +84,7 @@ define void @shl_i64_const_35(i64 addrsp
; GCN-DAG: buffer_load_dword v[[HI:[0-9]+]]
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @shl_i64_const_32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @shl_i64_const_32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = shl i64 %val, 32
store i64 %shl, i64 addrspace(1)* %out
@@ -96,7 +96,7 @@ define void @shl_i64_const_32(i64 addrsp
; GCN: v_lshlrev_b32_e32 v[[HI:[0-9]+]], 31, [[VAL]]
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @shl_i64_const_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @shl_i64_const_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = shl i64 %val, 63
store i64 %shl, i64 addrspace(1)* %out
@@ -106,7 +106,7 @@ define void @shl_i64_const_63(i64 addrsp
; ashr (i64 x), 63 => (ashr lo(x), 31), lo(x)
; GCN-LABEL: {{^}}ashr_i64_const_32:
-define void @ashr_i64_const_32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_i64_const_32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = ashr i64 %val, 32
store i64 %shl, i64 addrspace(1)* %out
@@ -114,7 +114,7 @@ define void @ashr_i64_const_32(i64 addrs
}
; GCN-LABEL: {{^}}ashr_i64_const_63:
-define void @ashr_i64_const_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_i64_const_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = ashr i64 %val, 63
store i64 %shl, i64 addrspace(1)* %out
@@ -125,7 +125,7 @@ define void @ashr_i64_const_63(i64 addrs
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 31, [[VAL]]
; GCN: buffer_store_dword [[SHL]]
-define void @trunc_shl_31_i32_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @trunc_shl_31_i32_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = shl i64 %val, 31
%trunc = trunc i64 %shl to i32
@@ -137,7 +137,7 @@ define void @trunc_shl_31_i32_i64(i32 ad
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 15, [[VAL]]
; GCN: buffer_store_short [[SHL]]
-define void @trunc_shl_15_i16_i64(i16 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @trunc_shl_15_i16_i64(i16 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = shl i64 %val, 15
%trunc = trunc i64 %shl to i16
@@ -149,7 +149,7 @@ define void @trunc_shl_15_i16_i64(i16 ad
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 15, [[VAL]]
; GCN: buffer_store_short [[SHL]]
-define void @trunc_shl_15_i16_i32(i16 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @trunc_shl_15_i16_i32(i16 addrspace(1)* %out, i32 addrspace(1)* %in) {
%val = load i32, i32 addrspace(1)* %in
%shl = shl i32 %val, 15
%trunc = trunc i32 %shl to i16
@@ -161,7 +161,7 @@ define void @trunc_shl_15_i16_i32(i16 ad
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 7, [[VAL]]
; GCN: buffer_store_byte [[SHL]]
-define void @trunc_shl_7_i8_i64(i8 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @trunc_shl_7_i8_i64(i8 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = shl i64 %val, 7
%trunc = trunc i64 %shl to i8
@@ -174,7 +174,7 @@ define void @trunc_shl_7_i8_i64(i8 addrs
; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 1, [[VAL]]
; GCN: v_and_b32_e32 [[AND:v[0-9]+]], 2, [[SHL]]
; GCN: buffer_store_byte [[AND]]
-define void @trunc_shl_1_i2_i64(i2 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @trunc_shl_1_i2_i64(i2 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = shl i64 %val, 1
%trunc = trunc i64 %shl to i2
@@ -186,7 +186,7 @@ define void @trunc_shl_1_i2_i64(i2 addrs
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 1, [[VAL]]
; GCN: buffer_store_dword [[SHL]]
-define void @trunc_shl_1_i32_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @trunc_shl_1_i32_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = shl i64 %val, 1
%trunc = trunc i64 %shl to i32
@@ -198,7 +198,7 @@ define void @trunc_shl_1_i32_i64(i32 add
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, [[VAL]]
; GCN: buffer_store_dword [[SHL]]
-define void @trunc_shl_16_i32_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @trunc_shl_16_i32_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = shl i64 %val, 16
%trunc = trunc i64 %shl to i32
@@ -209,7 +209,7 @@ define void @trunc_shl_16_i32_i64(i32 ad
; GCN-LABEL: {{^}}trunc_shl_33_i32_i64:
; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; GCN: buffer_store_dword [[ZERO]]
-define void @trunc_shl_33_i32_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @trunc_shl_33_i32_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = shl i64 %val, 33
%trunc = trunc i64 %shl to i32
@@ -222,7 +222,7 @@ define void @trunc_shl_33_i32_i64(i32 ad
; GCN-DAG: v_lshlrev_b32_e32 v[[RESHI:[0-9]+]], 16, v{{[0-9]+}}
; GCN-DAG: v_lshlrev_b32_e32 v[[RESLO:[0-9]+]], 16, v[[LO]]
; GCN: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]{{\]}}
-define void @trunc_shl_16_v2i32_v2i64(<2 x i32> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+define amdgpu_kernel void @trunc_shl_16_v2i32_v2i64(<2 x i32> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
%val = load <2 x i64>, <2 x i64> addrspace(1)* %in
%shl = shl <2 x i64> %val, <i64 16, i64 16>
%trunc = trunc <2 x i64> %shl to <2 x i32>
@@ -235,7 +235,7 @@ define void @trunc_shl_16_v2i32_v2i64(<2
; GCN: v_lshl_b64 v{{\[}}[[RESLO:[0-9]+]]:[[RESHI:[0-9]+]]{{\]}}, [[VAL]], 31
; GCN: buffer_store_dword v[[RESLO]]
; GCN: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]{{\]}}
-define void @trunc_shl_31_i32_i64_multi_use(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @trunc_shl_31_i32_i64_multi_use(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
%val = load i64, i64 addrspace(1)* %in
%shl = shl i64 %val, 31
%trunc = trunc i64 %shl to i32
Modified: llvm/trunk/test/CodeGen/AMDGPU/shl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shl.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shl.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shl.ll Tue Mar 21 16:39:51 2017
@@ -17,7 +17,7 @@ declare i32 @llvm.r600.read.tidig.x() #0
;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
%a = load <2 x i32>, <2 x i32> addrspace(1)* %in
%b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
@@ -44,7 +44,7 @@ define void @shl_v2i32(<2 x i32> addrspa
;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
%a = load <4 x i32>, <4 x i32> addrspace(1)* %in
%b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
@@ -57,7 +57,7 @@ define void @shl_v4i32(<4 x i32> addrspa
; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; VI: v_lshlrev_b16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @shl_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
+define amdgpu_kernel void @shl_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
%b_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
%a = load i16, i16 addrspace(1)* %in
%b = load i16, i16 addrspace(1)* %b_ptr
@@ -70,7 +70,7 @@ define void @shl_i16(i16 addrspace(1)* %
; VI: v_lshlrev_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
; VI: v_lshlrev_b16_e64 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
-define void @shl_i16_v_s(i16 addrspace(1)* %out, i16 addrspace(1)* %in, i16 %b) {
+define amdgpu_kernel void @shl_i16_v_s(i16 addrspace(1)* %out, i16 addrspace(1)* %in, i16 %b) {
%a = load i16, i16 addrspace(1)* %in
%result = shl i16 %a, %b
store i16 %result, i16 addrspace(1)* %out
@@ -81,7 +81,7 @@ define void @shl_i16_v_s(i16 addrspace(1
; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
; VI: v_lshlrev_b16_e64 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
-define void @shl_i16_v_compute_s(i16 addrspace(1)* %out, i16 addrspace(1)* %in, i16 %b) {
+define amdgpu_kernel void @shl_i16_v_compute_s(i16 addrspace(1)* %out, i16 addrspace(1)* %in, i16 %b) {
%a = load i16, i16 addrspace(1)* %in
%b.add = add i16 %b, 3
%result = shl i16 %a, %b.add
@@ -92,7 +92,7 @@ define void @shl_i16_v_compute_s(i16 add
; GCN-LABEL: {{^}}shl_i16_computed_amount:
; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 3, v{{[0-9]+}}
; VI: v_lshlrev_b16_e32 v{{[0-9]+}}, [[ADD]], v{{[0-9]+}}
-define void @shl_i16_computed_amount(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
+define amdgpu_kernel void @shl_i16_computed_amount(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() #0
%gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
@@ -107,7 +107,7 @@ define void @shl_i16_computed_amount(i16
; GCN-LABEL: {{^}}shl_i16_i_s:
; GCN: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 12
-define void @shl_i16_i_s(i16 addrspace(1)* %out, i16 zeroext %a) {
+define amdgpu_kernel void @shl_i16_i_s(i16 addrspace(1)* %out, i16 zeroext %a) {
%result = shl i16 %a, 12
store i16 %result, i16 addrspace(1)* %out
ret void
@@ -116,7 +116,7 @@ define void @shl_i16_i_s(i16 addrspace(1
; GCN-LABEL: {{^}}shl_v2i16:
; VI: v_lshlrev_b16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; VI: v_lshlrev_b16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() #0
%gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid
%gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
@@ -133,7 +133,7 @@ define void @shl_v2i16(<2 x i16> addrspa
; VI: v_lshlrev_b16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; VI: v_lshlrev_b16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; VI: v_lshlrev_b16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() #0
%gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid
%gep.out = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i32 %tid
@@ -160,7 +160,7 @@ define void @shl_v4i16(<4 x i16> addrspa
; GCN-LABEL: {{^}}shl_i64:
; SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
; VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
%a = load i64, i64 addrspace(1)* %in
%b = load i64, i64 addrspace(1)* %b_ptr
@@ -199,7 +199,7 @@ define void @shl_i64(i64 addrspace(1)* %
;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+define amdgpu_kernel void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
%a = load <2 x i64>, <2 x i64> addrspace(1)* %in
%b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
@@ -262,7 +262,7 @@ define void @shl_v2i64(<2 x i64> addrspa
;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
-define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+define amdgpu_kernel void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
%a = load <4 x i64>, <4 x i64> addrspace(1)* %in
%b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
@@ -277,7 +277,7 @@ define void @shl_v4i64(<4 x i64> addrspa
; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[LO_A]]
; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
-define void @s_shl_32_i64(i64 addrspace(1)* %out, i64 %a) {
+define amdgpu_kernel void @s_shl_32_i64(i64 addrspace(1)* %out, i64 %a) {
%result = shl i64 %a, 32
store i64 %result, i64 addrspace(1)* %out
ret void
@@ -287,7 +287,7 @@ define void @s_shl_32_i64(i64 addrspace(
; GCN-DAG: buffer_load_dword v[[LO_A:[0-9]+]],
; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[LO_A]]{{\]}}
-define void @v_shl_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @v_shl_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() #0
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
@@ -299,7 +299,7 @@ define void @v_shl_32_i64(i64 addrspace(
; FUNC-LABEL: {{^}}s_shl_constant_i64
; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-define void @s_shl_constant_i64(i64 addrspace(1)* %out, i64 %a) {
+define amdgpu_kernel void @s_shl_constant_i64(i64 addrspace(1)* %out, i64 %a) {
%shl = shl i64 281474976710655, %a
store i64 %shl, i64 addrspace(1)* %out, align 8
ret void
@@ -311,7 +311,7 @@ define void @s_shl_constant_i64(i64 addr
; SI-DAG: s_movk_i32 s[[KHI:[0-9]+]], 0x11e{{$}}
; SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\]}}, s{{\[}}[[KLO]]:[[KHI]]{{\]}}, [[VAL]]
; SI: buffer_store_dwordx2
-define void @v_shl_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_shl_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%a = load i64, i64 addrspace(1)* %aptr, align 8
%shl = shl i64 1231231234567, %a
store i64 %shl, i64 addrspace(1)* %out, align 8
@@ -323,7 +323,7 @@ define void @v_shl_constant_i64(i64 addr
; SI-DAG: s_mov_b32 s[[KLO:[0-9]+]], 0x12d687{{$}}
; SI-DAG: s_mov_b32 s[[KHI:[0-9]+]], 0{{$}}
; SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\]}}, s{{\[}}[[KLO]]:[[KHI]]{{\]}}, [[VAL]]
-define void @v_shl_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_shl_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%a = load i64, i64 addrspace(1)* %aptr, align 8
%shl = shl i64 1234567, %a
store i64 %shl, i64 addrspace(1)* %out, align 8
@@ -332,7 +332,7 @@ define void @v_shl_i64_32_bit_constant(i
; FUNC-LABEL: {{^}}v_shl_inline_imm_64_i64:
; SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\]}}, 64, {{v[0-9]+}}
-define void @v_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%a = load i64, i64 addrspace(1)* %aptr, align 8
%shl = shl i64 64, %a
store i64 %shl, i64 addrspace(1)* %out, align 8
@@ -341,7 +341,7 @@ define void @v_shl_inline_imm_64_i64(i64
; FUNC-LABEL: {{^}}s_shl_inline_imm_64_i64:
; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 64, s{{[0-9]+}}
-define void @s_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%shl = shl i64 64, %a
store i64 %shl, i64 addrspace(1)* %out, align 8
ret void
@@ -349,7 +349,7 @@ define void @s_shl_inline_imm_64_i64(i64
; FUNC-LABEL: {{^}}s_shl_inline_imm_1_i64:
; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 1, s{{[0-9]+}}
-define void @s_shl_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%shl = shl i64 1, %a
store i64 %shl, i64 addrspace(1)* %out, align 8
ret void
@@ -357,7 +357,7 @@ define void @s_shl_inline_imm_1_i64(i64
; FUNC-LABEL: {{^}}s_shl_inline_imm_1.0_i64:
; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{[0-9]+}}
-define void @s_shl_inline_imm_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%shl = shl i64 4607182418800017408, %a
store i64 %shl, i64 addrspace(1)* %out, align 8
ret void
@@ -365,7 +365,7 @@ define void @s_shl_inline_imm_1.0_i64(i6
; FUNC-LABEL: {{^}}s_shl_inline_imm_neg_1.0_i64:
; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, s{{[0-9]+}}
-define void @s_shl_inline_imm_neg_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_neg_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%shl = shl i64 13830554455654793216, %a
store i64 %shl, i64 addrspace(1)* %out, align 8
ret void
@@ -373,7 +373,7 @@ define void @s_shl_inline_imm_neg_1.0_i6
; FUNC-LABEL: {{^}}s_shl_inline_imm_0.5_i64:
; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 0.5, s{{[0-9]+}}
-define void @s_shl_inline_imm_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%shl = shl i64 4602678819172646912, %a
store i64 %shl, i64 addrspace(1)* %out, align 8
ret void
@@ -381,7 +381,7 @@ define void @s_shl_inline_imm_0.5_i64(i6
; FUNC-LABEL: {{^}}s_shl_inline_imm_neg_0.5_i64:
; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, -0.5, s{{[0-9]+}}
-define void @s_shl_inline_imm_neg_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_neg_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%shl = shl i64 13826050856027422720, %a
store i64 %shl, i64 addrspace(1)* %out, align 8
ret void
@@ -389,7 +389,7 @@ define void @s_shl_inline_imm_neg_0.5_i6
; FUNC-LABEL: {{^}}s_shl_inline_imm_2.0_i64:
; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 2.0, s{{[0-9]+}}
-define void @s_shl_inline_imm_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%shl = shl i64 4611686018427387904, %a
store i64 %shl, i64 addrspace(1)* %out, align 8
ret void
@@ -397,7 +397,7 @@ define void @s_shl_inline_imm_2.0_i64(i6
; FUNC-LABEL: {{^}}s_shl_inline_imm_neg_2.0_i64:
; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, -2.0, s{{[0-9]+}}
-define void @s_shl_inline_imm_neg_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_neg_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%shl = shl i64 13835058055282163712, %a
store i64 %shl, i64 addrspace(1)* %out, align 8
ret void
@@ -405,7 +405,7 @@ define void @s_shl_inline_imm_neg_2.0_i6
; FUNC-LABEL: {{^}}s_shl_inline_imm_4.0_i64:
; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 4.0, s{{[0-9]+}}
-define void @s_shl_inline_imm_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%shl = shl i64 4616189618054758400, %a
store i64 %shl, i64 addrspace(1)* %out, align 8
ret void
@@ -413,7 +413,7 @@ define void @s_shl_inline_imm_4.0_i64(i6
; FUNC-LABEL: {{^}}s_shl_inline_imm_neg_4.0_i64:
; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, -4.0, s{{[0-9]+}}
-define void @s_shl_inline_imm_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%shl = shl i64 13839561654909534208, %a
store i64 %shl, i64 addrspace(1)* %out, align 8
ret void
@@ -427,7 +427,7 @@ define void @s_shl_inline_imm_neg_4.0_i6
; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 4.0
; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0{{$}}
; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}, s{{[0-9]+}}
-define void @s_shl_inline_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%shl = shl i64 1082130432, %a
store i64 %shl, i64 addrspace(1)* %out, align 8
ret void
@@ -439,7 +439,7 @@ define void @s_shl_inline_imm_f32_4.0_i6
; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], -1{{$}}
; SI-DAG: s_mov_b32 s[[K_HI_COPY:[0-9]+]], s[[K_HI]]
; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI_COPY]]{{\]}}, s{{[0-9]+}}
-define void @s_shl_inline_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%shl = shl i64 -1065353216, %a
store i64 %shl, i64 addrspace(1)* %out, align 8
ret void
@@ -450,7 +450,7 @@ define void @s_shl_inline_imm_f32_neg_4.
; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 4.0
; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}}
; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}, s{{[0-9]+}}
-define void @s_shl_inline_high_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_high_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%shl = shl i64 4647714815446351872, %a
store i64 %shl, i64 addrspace(1)* %out, align 8
ret void
@@ -460,7 +460,7 @@ define void @s_shl_inline_high_imm_f32_4
; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], -4.0
; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}}
; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}, s{{[0-9]+}}
-define void @s_shl_inline_high_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_high_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%shl = shl i64 13871086852301127680, %a
store i64 %shl, i64 addrspace(1)* %out, align 8
ret void
@@ -468,7 +468,7 @@ define void @s_shl_inline_high_imm_f32_n
; FUNC-LABEL: {{^}}test_mul2:
; GCN: s_lshl_b32 s{{[0-9]}}, s{{[0-9]}}, 1
-define void @test_mul2(i32 %p) {
+define amdgpu_kernel void @test_mul2(i32 %p) {
%i = mul i32 %p, 2
store volatile i32 %i, i32 addrspace(1)* undef
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/shl.v2i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shl.v2i16.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shl.v2i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shl.v2i16.ll Tue Mar 21 16:39:51 2017
@@ -13,7 +13,7 @@
; CIVI: v_lshlrev_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; CIVI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
; CIVI: v_or_b32_e32
-define void @s_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 {
+define amdgpu_kernel void @s_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 {
%result = shl <2 x i16> %lhs, %rhs
store <2 x i16> %result, <2 x i16> addrspace(1)* %out
ret void
@@ -38,7 +38,7 @@ define void @s_shl_v2i16(<2 x i16> addrs
; CI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
; CI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @v_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
@@ -55,7 +55,7 @@ define void @v_shl_v2i16(<2 x i16> addrs
; GFX9: s_load_dword [[RHS:s[0-9]+]]
; GFX9: {{buffer|flat}}_load_dword [[LHS:v[0-9]+]]
; GFX9: v_pk_lshlrev_b16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]]
-define void @shl_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
+define amdgpu_kernel void @shl_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
@@ -70,7 +70,7 @@ define void @shl_v_s_v2i16(<2 x i16> add
; GFX9: s_load_dword [[LHS:s[0-9]+]]
; GFX9: {{buffer|flat}}_load_dword [[RHS:v[0-9]+]]
; GFX9: v_pk_lshlrev_b16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]]
-define void @shl_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
+define amdgpu_kernel void @shl_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
@@ -84,7 +84,7 @@ define void @shl_s_v_v2i16(<2 x i16> add
; GCN-LABEL: {{^}}shl_imm_v_v2i16:
; GCN: {{buffer|flat}}_load_dword [[RHS:v[0-9]+]]
; GFX9: v_pk_lshlrev_b16 [[RESULT:v[0-9]+]], [[RHS]], 8
-define void @shl_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @shl_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
@@ -98,7 +98,7 @@ define void @shl_imm_v_v2i16(<2 x i16> a
; GCN-LABEL: {{^}}shl_v_imm_v2i16:
; GCN: {{buffer|flat}}_load_dword [[LHS:v[0-9]+]]
; GFX9: v_pk_lshlrev_b16 [[RESULT:v[0-9]+]], 8, [[LHS]]
-define void @shl_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @shl_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
@@ -115,7 +115,7 @@ define void @shl_v_imm_v2i16(<2 x i16> a
; GFX9: v_pk_lshlrev_b16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GFX9: v_pk_lshlrev_b16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GCN: {{buffer|flat}}_store_dwordx2
-define void @v_shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i64 %tid.ext
@@ -133,7 +133,7 @@ define void @v_shl_v4i16(<4 x i16> addrs
; GFX9: v_pk_lshlrev_b16 v{{[0-9]+}}, 8, v{{[0-9]+}}
; GFX9: v_pk_lshlrev_b16 v{{[0-9]+}}, 8, v{{[0-9]+}}
; GCN: {{buffer|flat}}_store_dwordx2
-define void @shl_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @shl_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%in.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i64 %tid.ext
Modified: llvm/trunk/test/CodeGen/AMDGPU/shl_add_constant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shl_add_constant.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shl_add_constant.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shl_add_constant.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 36, [[REG]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
-define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
%val = load i32, i32 addrspace(1)* %ptr, align 4
@@ -25,7 +25,7 @@ define void @shl_2_add_9_i32(i32 addrspa
; SI-DAG: buffer_store_dword [[ADDREG]]
; SI-DAG: buffer_store_dword [[SHLREG]]
; SI: s_endpgm
-define void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
%val = load i32, i32 addrspace(1)* %ptr, align 4
@@ -43,7 +43,7 @@ define void @shl_2_add_9_i32_2_add_uses(
; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xf9c, [[REG]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
-define void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
%val = load i32, i32 addrspace(1)* %ptr, align 4
@@ -61,7 +61,7 @@ define void @shl_2_add_999_i32(i32 addrs
; SI: s_addk_i32 [[RESULT]], 0x3d8
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
; SI: buffer_store_dword [[VRESULT]]
-define void @test_add_shl_add_constant(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @test_add_shl_add_constant(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
%add.0 = add i32 %x, 123
%shl = shl i32 %add.0, 3
%add.1 = add i32 %shl, %y
@@ -78,7 +78,7 @@ define void @test_add_shl_add_constant(i
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[TMP]]
; SI: buffer_store_dword [[VRESULT]]
-define void @test_add_shl_add_constant_inv(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @test_add_shl_add_constant_inv(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
%add.0 = add i32 %x, 123
%shl = shl i32 %add.0, 3
%add.1 = add i32 %y, %shl
Modified: llvm/trunk/test/CodeGen/AMDGPU/shl_add_ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shl_add_ptr.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shl_add_ptr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shl_add_ptr.ll Tue Mar 21 16:39:51 2017
@@ -19,7 +19,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: ds_read_b32 {{v[0-9]+}}, [[PTR]] offset:8
; SI: s_endpgm
-define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
@@ -39,7 +39,7 @@ define void @load_shl_base_lds_0(float a
; SI-DAG: buffer_store_dword [[RESULT]]
; SI-DAG: buffer_store_dword [[ADDUSE]]
; SI: s_endpgm
-define void @load_shl_base_lds_1(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @load_shl_base_lds_1(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
@@ -55,7 +55,7 @@ define void @load_shl_base_lds_1(float a
; SI-LABEL: {{^}}load_shl_base_lds_max_offset
; SI: ds_read_u8 v{{[0-9]+}}, v{{[0-9]+}} offset:65535
; SI: s_endpgm
-define void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %lds, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %lds, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 65535
%arrayidx0 = getelementptr inbounds [65536 x i8], [65536 x i8] addrspace(3)* @maxlds, i32 0, i32 %idx.0
@@ -73,7 +73,7 @@ define void @load_shl_base_lds_max_offse
; SI: s_mov_b32 m0, -1
; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9
; SI: s_endpgm
-define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 64
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
@@ -89,7 +89,7 @@ define void @load_shl_base_lds_2(float a
; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: ds_write_b32 [[PTR]], {{v[0-9]+}} offset:8
; SI: s_endpgm
-define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
@@ -104,7 +104,7 @@ define void @store_shl_base_lds_0(float
@lds2 = addrspace(3) global [512 x i32] undef, align 4
-; define void @atomic_load_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+; define amdgpu_kernel void @atomic_load_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
; %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
; %idx.0 = add nsw i32 %tid.x, 2
; %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
@@ -119,7 +119,7 @@ define void @store_shl_base_lds_0(float
; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: ds_cmpst_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, {{v[0-9]+}} offset:8
; SI: s_endpgm
-define void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use, i32 %swap) #0 {
+define amdgpu_kernel void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use, i32 %swap) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
@@ -134,7 +134,7 @@ define void @atomic_cmpxchg_shl_base_lds
; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: ds_wrxchg_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; SI: s_endpgm
-define void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
@@ -148,7 +148,7 @@ define void @atomic_swap_shl_base_lds_0(
; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: ds_add_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; SI: s_endpgm
-define void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
@@ -162,7 +162,7 @@ define void @atomic_add_shl_base_lds_0(i
; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: ds_sub_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; SI: s_endpgm
-define void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
@@ -176,7 +176,7 @@ define void @atomic_sub_shl_base_lds_0(i
; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: ds_and_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; SI: s_endpgm
-define void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
@@ -190,7 +190,7 @@ define void @atomic_and_shl_base_lds_0(i
; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: ds_or_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; SI: s_endpgm
-define void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
@@ -204,7 +204,7 @@ define void @atomic_or_shl_base_lds_0(i3
; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: ds_xor_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; SI: s_endpgm
-define void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
@@ -214,7 +214,7 @@ define void @atomic_xor_shl_base_lds_0(i
ret void
}
-; define void @atomic_nand_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+; define amdgpu_kernel void @atomic_nand_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
; %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
; %idx.0 = add nsw i32 %tid.x, 2
; %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
@@ -228,7 +228,7 @@ define void @atomic_xor_shl_base_lds_0(i
; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: ds_min_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; SI: s_endpgm
-define void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
@@ -242,7 +242,7 @@ define void @atomic_min_shl_base_lds_0(i
; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: ds_max_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; SI: s_endpgm
-define void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
@@ -256,7 +256,7 @@ define void @atomic_max_shl_base_lds_0(i
; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: ds_min_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; SI: s_endpgm
-define void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
@@ -270,7 +270,7 @@ define void @atomic_umin_shl_base_lds_0(
; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: ds_max_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; SI: s_endpgm
-define void @atomic_umax_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_umax_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
Modified: llvm/trunk/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll Tue Mar 21 16:39:51 2017
@@ -7,7 +7,7 @@
; GCN-LABEL: {{^}}v_test_i32_x_sub_64:
; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
; GCN: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
-define void @v_test_i32_x_sub_64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_test_i32_x_sub_64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -23,7 +23,7 @@ define void @v_test_i32_x_sub_64(i32 add
; GCN: {{buffer|flat}}_load_dword [[Y:v[0-9]+]]
; GCN-DAG: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
; GCN-DAG: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[Y]]
-define void @v_test_i32_x_sub_64_multi_use(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -40,7 +40,7 @@ define void @v_test_i32_x_sub_64_multi_u
; GCN-LABEL: {{^}}v_test_i32_64_sub_x:
; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
-define void @v_test_i32_64_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_test_i32_64_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -54,7 +54,7 @@ define void @v_test_i32_64_sub_x(i32 add
; GCN-LABEL: {{^}}v_test_i32_x_sub_65:
; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 0xffffffbf, [[X]]
-define void @v_test_i32_x_sub_65(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_test_i32_x_sub_65(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -68,7 +68,7 @@ define void @v_test_i32_x_sub_65(i32 add
; GCN-LABEL: {{^}}v_test_i32_65_sub_x:
; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, 0x41, [[X]]
-define void @v_test_i32_65_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_test_i32_65_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -82,7 +82,7 @@ define void @v_test_i32_65_sub_x(i32 add
; GCN-LABEL: {{^}}v_test_i32_x_sub_neg16:
; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 16, [[X]]
-define void @v_test_i32_x_sub_neg16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_test_i32_x_sub_neg16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -96,7 +96,7 @@ define void @v_test_i32_x_sub_neg16(i32
; GCN-LABEL: {{^}}v_test_i32_neg16_sub_x:
; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, -16, [[X]]
-define void @v_test_i32_neg16_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_test_i32_neg16_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -110,7 +110,7 @@ define void @v_test_i32_neg16_sub_x(i32
; GCN-LABEL: {{^}}v_test_i32_x_sub_neg17:
; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 17, [[X]]
-define void @v_test_i32_x_sub_neg17(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_test_i32_x_sub_neg17(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -124,7 +124,7 @@ define void @v_test_i32_x_sub_neg17(i32
; GCN-LABEL: {{^}}v_test_i32_neg17_sub_x:
; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, 0xffffffef, [[X]]
-define void @v_test_i32_neg17_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_test_i32_neg17_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -138,7 +138,7 @@ define void @v_test_i32_neg17_sub_x(i32
; GCN-LABEL: {{^}}s_test_i32_x_sub_64:
; GCN: s_load_dword [[X:s[0-9]+]]
; GCN: s_sub_i32 s{{[0-9]+}}, [[X]], 64
-define void @s_test_i32_x_sub_64(i32 %x) #0 {
+define amdgpu_kernel void @s_test_i32_x_sub_64(i32 %x) #0 {
%result = sub i32 %x, 64
call void asm sideeffect "; use $0", "s"(i32 %result)
ret void
@@ -147,7 +147,7 @@ define void @s_test_i32_x_sub_64(i32 %x)
; GCN-LABEL: {{^}}v_test_i16_x_sub_64:
; VI: {{buffer|flat}}_load_ushort [[X:v[0-9]+]]
; VI: v_subrev_u16_e32 v{{[0-9]+}}, 64, [[X]]
-define void @v_test_i16_x_sub_64(i16 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_test_i16_x_sub_64(i16 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 %tid.ext
@@ -166,7 +166,7 @@ define void @v_test_i16_x_sub_64(i16 add
; SI-DAG: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
; SI-DAG: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[Y]]
-define void @v_test_i16_x_sub_64_multi_use(i16 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(i16 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 %tid.ext
Modified: llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir Tue Mar 21 16:39:51 2017
@@ -7,7 +7,7 @@
# resume crashes
--- |
- define void @shrink_add_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ define amdgpu_kernel void @shrink_add_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -20,7 +20,7 @@
ret void
}
- define void @shrink_sub_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ define amdgpu_kernel void @shrink_sub_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -33,7 +33,7 @@
ret void
}
- define void @shrink_subrev_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ define amdgpu_kernel void @shrink_subrev_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -46,7 +46,7 @@
ret void
}
- define void @check_addc_src2_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ define amdgpu_kernel void @check_addc_src2_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -59,7 +59,7 @@
ret void
}
- define void @shrink_addc_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ define amdgpu_kernel void @shrink_addc_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -72,7 +72,7 @@
ret void
}
- define void @shrink_addc_undef_vcc(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ define amdgpu_kernel void @shrink_addc_undef_vcc(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@
; GCN: s_cbranch_vccnz
; GCN-NOT: s_endpgm
; GCN: .Lfunc_end0
-define void @annotate_unreachable_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 {
+define amdgpu_kernel void @annotate_unreachable_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
br label %bb1
@@ -40,7 +40,7 @@ bb5:
; GCN: s_cbranch_scc1
; GCN: s_endpgm
; GCN: .Lfunc_end1
-define void @annotate_ret_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 {
+define amdgpu_kernel void @annotate_ret_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
br label %bb1
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll Tue Mar 21 16:39:51 2017
@@ -11,7 +11,7 @@
; GCN: s_and_saveexec_b64
; GCN-NOT: s_endpgm
; GCN: .Lfunc_end0
-define void @annotate_unreachable(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 {
+define amdgpu_kernel void @annotate_unreachable(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
br label %bb1
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll Tue Mar 21 16:39:51 2017
@@ -10,7 +10,7 @@
; SI: s_andn2_b64
; s_cbranch_execnz [[LOOP_LABEL]]
; SI: s_endpgm
-define void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a) {
+define amdgpu_kernel void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a) {
main_body:
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%0 = and i32 %a, %tid
@@ -40,7 +40,7 @@ ENDIF:
; SI: s_cbranch_execnz [[LOOP_LABEL]]
; SI: s_endpgm
-define void @phi_cond_outside_loop(i32 %b) {
+define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) {
entry:
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%0 = icmp eq i32 %tid , 0
@@ -68,7 +68,7 @@ exit:
; CHECK-LABEL: {{^}}switch_unreachable:
; CHECK-NOT: s_endpgm
; CHECK: .Lfunc_end2
-define void @switch_unreachable(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind {
+define amdgpu_kernel void @switch_unreachable(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind {
centry:
switch i32 %x, label %sw.default [
i32 0, label %sw.bb
@@ -100,7 +100,7 @@ declare float @llvm.fabs.f32(float) noun
; SI: [[ENDPGM]]:
; SI: s_endpgm
-define void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind {
+define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind {
entry:
%cmp = icmp sgt i32 %c0, 0
br label %while.cond.outer
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cfg-loop-assert.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cfg-loop-assert.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cfg-loop-assert.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cfg-loop-assert.ll Tue Mar 21 16:39:51 2017
@@ -6,7 +6,7 @@
; CHECK s_or_b64 exec, exec
; CHECK s_andn2_b64 exec, exec
; CHECK s_cbranch_execnz
-define void @test(i32 %arg, i32 %arg1) {
+define amdgpu_kernel void @test(i32 %arg, i32 %arg1) {
bb:
%tmp = icmp ne i32 %arg, 0
%tmp7 = icmp ne i32 %arg1, 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir Tue Mar 21 16:39:51 2017
@@ -1,7 +1,7 @@
# RUN: llc -march=amdgcn -run-pass si-fix-sgpr-copies %s -o - | FileCheck %s -check-prefixes=GCN
--- |
- define void @phi_visit_order() { ret void }
+ define amdgpu_kernel void @phi_visit_order() { ret void }
name: phi_visit_order
tracksRegLiveness: true
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll Tue Mar 21 16:39:51 2017
@@ -5,7 +5,7 @@
; CHECK: %{{[0-9]+}} = V_ADD_I32_e32 %{{[0-9]+}}, %{{[0-9]+}}, implicit-def %vcc, implicit %exec
-define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
%b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%a = load volatile i32, i32 addrspace(1)* %in
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll Tue Mar 21 16:39:51 2017
@@ -14,7 +14,7 @@
; GCN: [[UNREACHABLE]]:
; GCN: ds_write_b32
; GCN: s_waitcnt
-define void @lower_control_flow_unreachable_terminator() #0 {
+define amdgpu_kernel void @lower_control_flow_unreachable_terminator() #0 {
bb:
%tmp15 = tail call i32 @llvm.amdgcn.workitem.id.y()
%tmp63 = icmp eq i32 %tmp15, 32
@@ -41,7 +41,7 @@ ret:
; GCN-NEXT: s_or_b64 exec, exec
; GCN: ds_write_b32
; GCN: s_waitcnt
-define void @lower_control_flow_unreachable_terminator_swap_block_order() #0 {
+define amdgpu_kernel void @lower_control_flow_unreachable_terminator_swap_block_order() #0 {
bb:
%tmp15 = tail call i32 @llvm.amdgcn.workitem.id.y()
%tmp63 = icmp eq i32 %tmp15, 32
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll Tue Mar 21 16:39:51 2017
@@ -24,7 +24,7 @@
; SMEM: s_dcache_wb
; ALL: s_endpgm
-define void @test(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 %in) {
call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" ()
call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" ()
call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" ()
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll Tue Mar 21 16:39:51 2017
@@ -13,7 +13,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
; FUNC-LABEL: @reorder_local_load_global_store_local_load
; CI: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:1 offset1:3
; CI: buffer_store_dword
-define void @reorder_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
+define amdgpu_kernel void @reorder_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
%ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
%ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
@@ -33,7 +33,7 @@ define void @reorder_local_load_global_s
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
; CI: buffer_store_dword
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
-define void @no_reorder_local_load_volatile_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
+define amdgpu_kernel void @no_reorder_local_load_volatile_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
%ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
%ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
@@ -53,7 +53,7 @@ define void @no_reorder_local_load_volat
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
; CI: buffer_store_dword
-define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
+define amdgpu_kernel void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
%ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
%ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
@@ -77,7 +77,7 @@ define void @no_reorder_barrier_local_lo
; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x1
; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x3
; CI: buffer_store_dword
-define void @reorder_constant_load_global_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
+define amdgpu_kernel void @reorder_constant_load_global_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
%ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
%ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
@@ -100,7 +100,7 @@ define void @reorder_constant_load_globa
; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x3
; CI: ds_write_b32
; CI: buffer_store_dword
-define void @reorder_constant_load_local_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr) #0 {
+define amdgpu_kernel void @reorder_constant_load_local_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr) #0 {
%ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
%ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
@@ -122,7 +122,7 @@ define void @reorder_constant_load_local
; CI: s_load_dword
; CI: ds_write_b32
; CI: buffer_store_dword
-define void @reorder_smrd_load_local_store_smrd_load(i32 addrspace(1)* %out, i32 addrspace(3)* noalias %lptr, i32 addrspace(2)* %ptr0) #0 {
+define amdgpu_kernel void @reorder_smrd_load_local_store_smrd_load(i32 addrspace(1)* %out, i32 addrspace(3)* noalias %lptr, i32 addrspace(2)* %ptr0) #0 {
%ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
%ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
@@ -141,7 +141,7 @@ define void @reorder_smrd_load_local_sto
; CI: buffer_load_dword
; CI: buffer_load_dword
; CI: buffer_store_dword
-define void @reorder_global_load_local_store_global_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr, i32 addrspace(1)* %ptr0) #0 {
+define amdgpu_kernel void @reorder_global_load_local_store_global_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr, i32 addrspace(1)* %ptr0) #0 {
%ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i64 1
%ptr2 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i64 3
@@ -161,7 +161,7 @@ define void @reorder_global_load_local_s
; CI-DAG: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:408
; CI: buffer_store_dword
; CI: s_endpgm
-define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(3)* noalias nocapture %ptr0) #0 {
+define amdgpu_kernel void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(3)* noalias nocapture %ptr0) #0 {
%ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 3
%ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 100
%ptr3 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 102
@@ -187,7 +187,7 @@ define void @reorder_local_offsets(i32 a
; CI-DAG: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408
; CI: buffer_store_dword
; CI: s_endpgm
-define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(1)* noalias nocapture %ptr0) #0 {
+define amdgpu_kernel void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(1)* noalias nocapture %ptr0) #0 {
%ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 3
%ptr2 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 100
%ptr3 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 102
@@ -221,7 +221,7 @@ define void @reorder_global_offsets(i32
; GCN: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:36{{$}}
; GCN-NEXT: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:52{{$}}
-define void @reorder_global_offsets_addr64_soffset0(i32 addrspace(1)* noalias nocapture %ptr.base) #0 {
+define amdgpu_kernel void @reorder_global_offsets_addr64_soffset0(i32 addrspace(1)* noalias nocapture %ptr.base) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
%id.ext = sext i32 %id to i64
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-vector-hang.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-vector-hang.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-vector-hang.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-vector-hang.ll Tue Mar 21 16:39:51 2017
@@ -12,7 +12,7 @@
; CHECK: buffer_store_byte
; ModuleID = 'radeon'
-define void @test_8_min_char(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture readonly %in0, i8 addrspace(1)* nocapture readonly %in1) #0 {
+define amdgpu_kernel void @test_8_min_char(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture readonly %in0, i8 addrspace(1)* nocapture readonly %in1) #0 {
entry:
%0 = load i8, i8 addrspace(1)* %in0, align 1
%1 = insertelement <8 x i8> undef, i8 %0, i32 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/sign_extend.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sign_extend.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sign_extend.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sign_extend.ll Tue Mar 21 16:39:51 2017
@@ -4,7 +4,7 @@
; GCN-LABEL: {{^}}s_sext_i1_to_i32:
; GCN: v_cndmask_b32_e64
; GCN: s_endpgm
-define void @s_sext_i1_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @s_sext_i1_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp eq i32 %a, %b
%sext = sext i1 %cmp to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
@@ -14,7 +14,7 @@ define void @s_sext_i1_to_i32(i32 addrsp
; GCN-LABEL: {{^}}test_s_sext_i32_to_i64:
; GCN: s_ashr_i32
; GCN: s_endpg
-define void @test_s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind {
+define amdgpu_kernel void @test_s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind {
entry:
%mul = mul i32 %a, %b
%add = add i32 %mul, %c
@@ -28,7 +28,7 @@ entry:
; GCN: v_mov_b32_e32 v[[HIREG:[0-9]+]], v[[LOREG]]
; GCN: buffer_store_dwordx2 v{{\[}}[[LOREG]]:[[HIREG]]{{\]}}
; GCN: s_endpgm
-define void @s_sext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @s_sext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp eq i32 %a, %b
%sext = sext i1 %cmp to i64
store i64 %sext, i64 addrspace(1)* %out, align 8
@@ -38,7 +38,7 @@ define void @s_sext_i1_to_i64(i64 addrsp
; GCN-LABEL: {{^}}s_sext_i32_to_i64:
; GCN: s_ashr_i32
; GCN: s_endpgm
-define void @s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a) nounwind {
+define amdgpu_kernel void @s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a) nounwind {
%sext = sext i32 %a to i64
store i64 %sext, i64 addrspace(1)* %out, align 8
ret void
@@ -47,7 +47,7 @@ define void @s_sext_i32_to_i64(i64 addrs
; GCN-LABEL: {{^}}v_sext_i32_to_i64:
; GCN: v_ashr
; GCN: s_endpgm
-define void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%val = load i32, i32 addrspace(1)* %in, align 4
%sext = sext i32 %val to i64
store i64 %sext, i64 addrspace(1)* %out, align 8
@@ -56,7 +56,7 @@ define void @v_sext_i32_to_i64(i64 addrs
; GCN-LABEL: {{^}}s_sext_i16_to_i64:
; GCN: s_bfe_i64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x100000
-define void @s_sext_i16_to_i64(i64 addrspace(1)* %out, i16 %a) nounwind {
+define amdgpu_kernel void @s_sext_i16_to_i64(i64 addrspace(1)* %out, i16 %a) nounwind {
%sext = sext i16 %a to i64
store i64 %sext, i64 addrspace(1)* %out, align 8
ret void
@@ -65,7 +65,7 @@ define void @s_sext_i16_to_i64(i64 addrs
; GCN-LABEL: {{^}}s_sext_i1_to_i16:
; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1
; GCN-NEXT: buffer_store_short [[RESULT]]
-define void @s_sext_i1_to_i16(i16 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @s_sext_i1_to_i16(i16 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp eq i32 %a, %b
%sext = sext i1 %cmp to i16
store i16 %sext, i16 addrspace(1)* %out
@@ -79,7 +79,7 @@ define void @s_sext_i1_to_i16(i16 addrsp
; GCN-LABEL: {{^}}s_sext_i1_to_i16_with_and:
; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1
; GCN-NEXT: buffer_store_short [[RESULT]]
-define void @s_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+define amdgpu_kernel void @s_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
%cmp0 = icmp eq i32 %a, %b
%cmp1 = icmp eq i32 %c, %d
%cmp = and i1 %cmp0, %cmp1
@@ -91,7 +91,7 @@ define void @s_sext_i1_to_i16_with_and(i
; GCN-LABEL: {{^}}v_sext_i1_to_i16_with_and:
; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1
; GCN-NEXT: buffer_store_short [[RESULT]]
-define void @v_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind {
+define amdgpu_kernel void @v_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%cmp0 = icmp eq i32 %a, %tid
%cmp1 = icmp eq i32 %b, %c
@@ -130,7 +130,7 @@ define void @v_sext_i1_to_i16_with_and(i
; GCN-DAG: buffer_store_dword [[VEXT3]]
; GCN: s_endpgm
-define void @s_sext_v4i8_to_v4i32(i32 addrspace(1)* %out, i32 %a) nounwind {
+define amdgpu_kernel void @s_sext_v4i8_to_v4i32(i32 addrspace(1)* %out, i32 %a) nounwind {
%cast = bitcast i32 %a to <4 x i8>
%ext = sext <4 x i8> %cast to <4 x i32>
%elt0 = extractelement <4 x i32> %ext, i32 0
@@ -162,7 +162,7 @@ define void @s_sext_v4i8_to_v4i32(i32 ad
; GCN: buffer_store_dword [[EXT1]]
; GCN: buffer_store_dword [[EXT2]]
; GCN: buffer_store_dword [[EXT3]]
-define void @v_sext_v4i8_to_v4i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @v_sext_v4i8_to_v4i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%a = load i32, i32 addrspace(1)* %in
%cast = bitcast i32 %a to <4 x i8>
%ext = sext <4 x i8> %cast to <4 x i32>
@@ -184,7 +184,7 @@ define void @v_sext_v4i8_to_v4i32(i32 ad
; GCN-DAG: s_sext_i32_i16
; GCN-DAG: s_sext_i32_i16
; GCN: s_endpgm
-define void @s_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 %a) nounwind {
+define amdgpu_kernel void @s_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 %a) nounwind {
%cast = bitcast i64 %a to <4 x i16>
%ext = sext <4 x i16> %cast to <4 x i32>
%elt0 = extractelement <4 x i32> %ext, i32 0
@@ -206,7 +206,7 @@ define void @s_sext_v4i16_to_v4i32(i32 a
; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16
; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16
; GCN: s_endpgm
-define void @v_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @v_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
%a = load i64, i64 addrspace(1)* %in
%cast = bitcast i64 %a to <4 x i16>
%ext = sext <4 x i16> %cast to <4 x i32>
Modified: llvm/trunk/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sint_to_fp.f64.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sint_to_fp.f64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sint_to_fp.f64.ll Tue Mar 21 16:39:51 2017
@@ -4,7 +4,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
; SI-LABEL: {{^}}sint_to_fp_i32_to_f64
; SI: v_cvt_f64_i32_e32
-define void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) {
%result = sitofp i32 %in to double
store double %result, double addrspace(1)* %out
ret void
@@ -19,7 +19,7 @@ define void @sint_to_fp_i32_to_f64(doubl
; SI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; SI: buffer_store_dwordx2 v{{\[}}[[ZERO]]:[[SEL]]{{\]}}
; SI: s_endpgm
-define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
%cmp = icmp eq i32 %in, 0
%fp = sitofp i1 %cmp to double
store double %fp, double addrspace(1)* %out, align 4
@@ -31,14 +31,14 @@ define void @sint_to_fp_i1_f64(double ad
; SI-NEXT: v_cvt_f64_i32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]]
; SI: buffer_store_dwordx2 [[RESULT]]
; SI: s_endpgm
-define void @sint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 %in) {
+define amdgpu_kernel void @sint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 %in) {
%fp = sitofp i1 %in to double
store double %fp, double addrspace(1)* %out, align 8
ret void
}
; SI-LABEL: @s_sint_to_fp_i64_to_f64
-define void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) {
+define amdgpu_kernel void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) {
%result = sitofp i64 %in to double
store double %result, double addrspace(1)* %out
ret void
@@ -51,7 +51,7 @@ define void @s_sint_to_fp_i64_to_f64(dou
; SI-DAG: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
; SI: buffer_store_dwordx2 [[RESULT]]
-define void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%val = load i64, i64 addrspace(1)* %gep, align 8
Modified: llvm/trunk/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sint_to_fp.i64.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sint_to_fp.i64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sint_to_fp.i64.ll Tue Mar 21 16:39:51 2017
@@ -4,7 +4,7 @@
; FIXME: This should be merged with sint_to_fp.ll, but s_sint_to_fp_v2i64 crashes on r600
; FUNC-LABEL: {{^}}s_sint_to_fp_i64_to_f16:
-define void @s_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 %in) #0 {
+define amdgpu_kernel void @s_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 %in) #0 {
%result = sitofp i64 %in to half
store half %result, half addrspace(1)* %out
ret void
@@ -28,7 +28,7 @@ define void @s_sint_to_fp_i64_to_f16(hal
; GCN: v_cndmask_b32_e{{32|64}} [[SIGN_SEL:v[0-9]+]],
; GCN: v_cvt_f16_f32_e32 [[SIGN_SEL_F16:v[0-9]+]], [[SIGN_SEL]]
; GCN: {{buffer|flat}}_store_short {{.*}}[[SIGN_SEL_F16]]
-define void @v_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
@@ -39,7 +39,7 @@ define void @v_sint_to_fp_i64_to_f16(hal
}
; FUNC-LABEL: {{^}}s_sint_to_fp_i64_to_f32:
-define void @s_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 {
+define amdgpu_kernel void @s_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 {
%result = sitofp i64 %in to float
store float %result, float addrspace(1)* %out
ret void
@@ -62,7 +62,7 @@ define void @s_sint_to_fp_i64_to_f32(flo
; GCN: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
; GCN: v_cndmask_b32_e{{32|64}} [[SIGN_SEL:v[0-9]+]],
; GCN: {{buffer|flat}}_store_dword {{.*}}[[SIGN_SEL]]
-define void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -74,14 +74,14 @@ define void @v_sint_to_fp_i64_to_f32(flo
; FUNC-LABEL: {{^}}s_sint_to_fp_v2i64_to_v2f32:
; GCN-NOT: v_and_b32_e32 v{{[0-9]+}}, -1,
-define void @s_sint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i64> %in) #0{
+define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i64> %in) #0{
%result = sitofp <2 x i64> %in to <2 x float>
store <2 x float> %result, <2 x float> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}v_sint_to_fp_v4i64_to_v4f32:
-define void @v_sint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
%out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
@@ -93,14 +93,14 @@ define void @v_sint_to_fp_v4i64_to_v4f32
; FUNC-LABEL: {{^}}s_sint_to_fp_v2i64_to_v2f16:
; GCN-NOT: v_and_b32_e32 v{{[0-9]+}}, -1,
-define void @s_sint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)* %out, <2 x i64> %in) #0{
+define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)* %out, <2 x i64> %in) #0{
%result = sitofp <2 x i64> %in to <2 x half>
store <2 x half> %result, <2 x half> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}v_sint_to_fp_v4i64_to_v4f16:
-define void @v_sint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
%out.gep = getelementptr <4 x half>, <4 x half> addrspace(1)* %out, i32 %tid
Modified: llvm/trunk/test/CodeGen/AMDGPU/sint_to_fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sint_to_fp.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sint_to_fp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sint_to_fp.ll Tue Mar 21 16:39:51 2017
@@ -6,7 +6,7 @@
; SI: v_cvt_f32_i32_e32 {{v[0-9]+}}, {{s[0-9]+$}}
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-define void @s_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) #0 {
+define amdgpu_kernel void @s_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) #0 {
%result = sitofp i32 %in to float
store float %result, float addrspace(1)* %out
ret void
@@ -16,7 +16,7 @@ define void @s_sint_to_fp_i32_to_f32(flo
; SI: v_cvt_f32_i32_e32 {{v[0-9]+}}, {{v[0-9]+$}}
; R600: INT_TO_FLT
-define void @v_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -32,7 +32,7 @@ define void @v_sint_to_fp_i32_to_f32(flo
; R600-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
; R600-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
-define void @s_sint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) #0{
+define amdgpu_kernel void @s_sint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) #0{
%result = sitofp <2 x i32> %in to <2 x float>
store <2 x float> %result, <2 x float> addrspace(1)* %out
ret void
@@ -49,7 +49,7 @@ define void @s_sint_to_fp_v2i32(<2 x flo
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define void @s_sint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @s_sint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
%value = load <4 x i32>, <4 x i32> addrspace(1) * %in
%result = sitofp <4 x i32> %value to <4 x float>
store <4 x float> %result, <4 x float> addrspace(1)* %out
@@ -66,7 +66,7 @@ define void @s_sint_to_fp_v4i32_to_v4f32
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define void @v_sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid
%out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
@@ -81,7 +81,7 @@ define void @v_sint_to_fp_v4i32(<4 x flo
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
-define void @s_sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) #0 {
+define amdgpu_kernel void @s_sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) #0 {
%cmp = icmp eq i32 %in, 0
%fp = uitofp i1 %cmp to float
store float %fp, float addrspace(1)* %out
@@ -92,7 +92,7 @@ define void @s_sint_to_fp_i1_f32(float a
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1.0
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
-define void @s_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) #0 {
+define amdgpu_kernel void @s_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) #0 {
%fp = sitofp i1 %in to float
store float %fp, float addrspace(1)* %out
ret void
@@ -105,7 +105,7 @@ define void @s_sint_to_fp_i1_f32_load(fl
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1.0
; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]]
; SI: s_endpgm
-define void @v_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i1, i1 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
Modified: llvm/trunk/test/CodeGen/AMDGPU/sitofp.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sitofp.f16.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sitofp.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sitofp.f16.ll Tue Mar 21 16:39:51 2017
@@ -7,7 +7,7 @@
; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
-define void @sitofp_i16_to_f16(
+define amdgpu_kernel void @sitofp_i16_to_f16(
half addrspace(1)* %r,
i16 addrspace(1)* %a) {
entry:
@@ -23,7 +23,7 @@ entry:
; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_I16]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
-define void @sitofp_i32_to_f16(
+define amdgpu_kernel void @sitofp_i32_to_f16(
half addrspace(1)* %r,
i32 addrspace(1)* %a) {
entry:
@@ -45,7 +45,7 @@ entry:
; GCN-DAG: v_or_b32_e32
; GCN: buffer_store_dword
; GCN: s_endpgm
-define void @sitofp_v2i16_to_v2f16(
+define amdgpu_kernel void @sitofp_v2i16_to_v2f16(
<2 x half> addrspace(1)* %r,
<2 x i16> addrspace(1)* %a) {
entry:
@@ -65,7 +65,7 @@ entry:
; GCN-DAG: v_or_b32_e32
; GCN: buffer_store_dword
; GCN: s_endpgm
-define void @sitofp_v2i32_to_v2f16(
+define amdgpu_kernel void @sitofp_v2i32_to_v2f16(
<2 x half> addrspace(1)* %r,
<2 x i32> addrspace(1)* %a) {
entry:
Modified: llvm/trunk/test/CodeGen/AMDGPU/smed3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/smed3.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/smed3.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/smed3.ll Tue Mar 21 16:39:51 2017
@@ -6,7 +6,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
; GCN-LABEL: {{^}}v_test_smed3_r_i_i_i32:
; GCN: v_med3_i32 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
-define void @v_test_smed3_r_i_i_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 {
+define amdgpu_kernel void @v_test_smed3_r_i_i_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@@ -25,7 +25,7 @@ define void @v_test_smed3_r_i_i_i32(i32
; GCN-LABEL: {{^}}v_test_smed3_multi_use_r_i_i_i32:
; GCN: v_max_i32
; GCN: v_min_i32
-define void @v_test_smed3_multi_use_r_i_i_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 {
+define amdgpu_kernel void @v_test_smed3_multi_use_r_i_i_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@@ -45,7 +45,7 @@ define void @v_test_smed3_multi_use_r_i_
; GCN-LABEL: {{^}}v_test_smed3_r_i_i_constant_order_i32:
; GCN: v_max_i32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}}
; GCN: v_min_i32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}}
-define void @v_test_smed3_r_i_i_constant_order_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 {
+define amdgpu_kernel void @v_test_smed3_r_i_i_constant_order_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@@ -64,7 +64,7 @@ define void @v_test_smed3_r_i_i_constant
; GCN-LABEL: {{^}}v_test_smed3_r_i_i_sign_mismatch_i32:
; GCN: v_max_u32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}}
; GCN: v_min_i32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}}
-define void @v_test_smed3_r_i_i_sign_mismatch_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 {
+define amdgpu_kernel void @v_test_smed3_r_i_i_sign_mismatch_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@@ -83,7 +83,7 @@ define void @v_test_smed3_r_i_i_sign_mis
; GCN-LABEL: {{^}}v_test_smed3_r_i_i_i64:
; GCN: v_cmp_lt_i64
; GCN: v_cmp_gt_i64
-define void @v_test_smed3_r_i_i_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) #1 {
+define amdgpu_kernel void @v_test_smed3_r_i_i_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%outgep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
@@ -102,7 +102,7 @@ define void @v_test_smed3_r_i_i_i64(i64
; GCN-LABEL: {{^}}v_test_smed3_r_i_i_i16:
; SICIVI: v_med3_i32 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
; GFX9: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
-define void @v_test_smed3_r_i_i_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr) #1 {
+define amdgpu_kernel void @v_test_smed3_r_i_i_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
%outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
@@ -174,7 +174,7 @@ define internal i8 @smax8(i8 %x, i8 %y)
; GCN-LABEL: {{^}}s_test_smed3_i32_pat_0:
; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @s_test_smed3_i32_pat_0(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_0(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
bb:
%tmp0 = call i32 @smin(i32 %x, i32 %y)
%tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -186,7 +186,7 @@ bb:
; GCN-LABEL: {{^}}s_test_smed3_i32_pat_1:
; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @s_test_smed3_i32_pat_1(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_1(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
bb:
%tmp0 = call i32 @smin(i32 %x, i32 %y)
%tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -198,7 +198,7 @@ bb:
; GCN-LABEL: {{^}}s_test_smed3_i32_pat_2:
; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @s_test_smed3_i32_pat_2(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_2(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
bb:
%tmp0 = call i32 @smin(i32 %x, i32 %y)
%tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -210,7 +210,7 @@ bb:
; GCN-LABEL: {{^}}s_test_smed3_i32_pat_3:
; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @s_test_smed3_i32_pat_3(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_3(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
bb:
%tmp0 = call i32 @smin(i32 %x, i32 %y)
%tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -222,7 +222,7 @@ bb:
; GCN-LABEL: {{^}}s_test_smed3_i32_pat_4:
; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @s_test_smed3_i32_pat_4(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_4(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
bb:
%tmp0 = call i32 @smin(i32 %y, i32 %x)
%tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -234,7 +234,7 @@ bb:
; GCN-LABEL: {{^}}s_test_smed3_i32_pat_5:
; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @s_test_smed3_i32_pat_5(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_5(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
bb:
%tmp0 = call i32 @smin(i32 %y, i32 %x)
%tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -246,7 +246,7 @@ bb:
; GCN-LABEL: {{^}}s_test_smed3_i32_pat_6:
; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @s_test_smed3_i32_pat_6(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_6(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
bb:
%tmp0 = call i32 @smin(i32 %y, i32 %x)
%tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -258,7 +258,7 @@ bb:
; GCN-LABEL: {{^}}s_test_smed3_i32_pat_7:
; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @s_test_smed3_i32_pat_7(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_7(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
bb:
%tmp0 = call i32 @smin(i32 %y, i32 %x)
%tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -270,7 +270,7 @@ bb:
; GCN-LABEL: {{^}}s_test_smed3_i32_pat_8:
; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @s_test_smed3_i32_pat_8(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_8(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
bb:
%tmp0 = call i32 @smin(i32 %x, i32 %y)
%tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -282,7 +282,7 @@ bb:
; GCN-LABEL: {{^}}s_test_smed3_i32_pat_9:
; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @s_test_smed3_i32_pat_9(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_9(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
bb:
%tmp0 = call i32 @smin(i32 %x, i32 %y)
%tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -294,7 +294,7 @@ bb:
; GCN-LABEL: {{^}}s_test_smed3_i32_pat_10:
; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @s_test_smed3_i32_pat_10(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_10(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
bb:
%tmp0 = call i32 @smin(i32 %x, i32 %y)
%tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -306,7 +306,7 @@ bb:
; GCN-LABEL: {{^}}s_test_smed3_i32_pat_11:
; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @s_test_smed3_i32_pat_11(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_11(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
bb:
%tmp0 = call i32 @smin(i32 %x, i32 %y)
%tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -318,7 +318,7 @@ bb:
; GCN-LABEL: {{^}}s_test_smed3_i32_pat_12:
; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @s_test_smed3_i32_pat_12(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_12(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
bb:
%tmp0 = call i32 @smin(i32 %y, i32 %x)
%tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -330,7 +330,7 @@ bb:
; GCN-LABEL: {{^}}s_test_smed3_i32_pat_13:
; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @s_test_smed3_i32_pat_13(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_13(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
bb:
%tmp0 = call i32 @smin(i32 %y, i32 %x)
%tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -342,7 +342,7 @@ bb:
; GCN-LABEL: {{^}}s_test_smed3_i32_pat_14:
; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @s_test_smed3_i32_pat_14(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_14(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
bb:
%tmp0 = call i32 @smin(i32 %y, i32 %x)
%tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -354,7 +354,7 @@ bb:
; GCN-LABEL: {{^}}s_test_smed3_i32_pat_15:
; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @s_test_smed3_i32_pat_15(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_15(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
bb:
%tmp0 = call i32 @smin(i32 %y, i32 %x)
%tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -370,7 +370,7 @@ bb:
; GCN: s_sext_i32_i16
; GCN: s_sext_i32_i16
; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @s_test_smed3_i16_pat_0(i16 addrspace(1)* %arg, i16 %x, i16 %y, i16 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i16_pat_0(i16 addrspace(1)* %arg, i16 %x, i16 %y, i16 %z) #1 {
bb:
%tmp0 = call i16 @smin16(i16 %x, i16 %y)
%tmp1 = call i16 @smax16(i16 %x, i16 %y)
@@ -385,7 +385,7 @@ bb:
; GCN: s_sext_i32_i8
; GCN: s_sext_i32_i8
; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @s_test_smed3_i8_pat_0(i8 addrspace(1)* %arg, i8 %x, i8 %y, i8 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i8_pat_0(i8 addrspace(1)* %arg, i8 %x, i8 %y, i8 %z) #1 {
bb:
%tmp0 = call i8 @smin8(i8 %x, i8 %y)
%tmp1 = call i8 @smax8(i8 %x, i8 %y)
@@ -397,7 +397,7 @@ bb:
; GCN-LABEL: {{^}}s_test_smed3_i32_pat_0_multi_use_0:
; GCN-NOT: v_med3_i32
-define void @s_test_smed3_i32_pat_0_multi_use_0(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_0(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
bb:
%tmp0 = call i32 @smin(i32 %x, i32 %y)
%tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -410,7 +410,7 @@ bb:
; GCN-LABEL: {{^}}s_test_smed3_i32_pat_0_multi_use_1:
; GCN-NOT: v_med3_i32
-define void @s_test_smed3_i32_pat_0_multi_use_1(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_1(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
bb:
%tmp0 = call i32 @smin(i32 %x, i32 %y)
%tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -423,7 +423,7 @@ bb:
; GCN-LABEL: {{^}}s_test_smed3_i32_pat_0_multi_use_2:
; GCN-NOT: v_med3_i32
-define void @s_test_smed3_i32_pat_0_multi_use_2(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_2(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
bb:
%tmp0 = call i32 @smin(i32 %x, i32 %y)
%tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -436,7 +436,7 @@ bb:
; GCN-LABEL: {{^}}s_test_smed3_i32_pat_0_multi_use_result:
; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @s_test_smed3_i32_pat_0_multi_use_result(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_result(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
bb:
%tmp0 = call i32 @smin(i32 %x, i32 %y)
%tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -457,7 +457,7 @@ bb:
; VI: v_max_i16
; GFX9: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @v_test_smed3_i16_pat_0(i16 addrspace(1)* %arg, i16 addrspace(1)* %out, i16 addrspace(1)* %a.ptr) #1 {
+define amdgpu_kernel void @v_test_smed3_i16_pat_0(i16 addrspace(1)* %arg, i16 addrspace(1)* %out, i16 addrspace(1)* %a.ptr) #1 {
bb:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i32 %tid
Modified: llvm/trunk/test/CodeGen/AMDGPU/sminmax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sminmax.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sminmax.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sminmax.ll Tue Mar 21 16:39:51 2017
@@ -7,7 +7,7 @@
; GCN: s_add_i32
; EG: MAX_INT
-define void @s_abs_i32(i32 addrspace(1)* %out, i32 %val) nounwind {
+define amdgpu_kernel void @s_abs_i32(i32 addrspace(1)* %out, i32 %val) nounwind {
%neg = sub i32 0, %val
%cond = icmp sgt i32 %val, %neg
%res = select i1 %cond, i32 %val, i32 %neg
@@ -22,7 +22,7 @@ define void @s_abs_i32(i32 addrspace(1)*
; GCN: v_add_i32
; EG: MAX_INT
-define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
+define amdgpu_kernel void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
%val = load i32, i32 addrspace(1)* %src, align 4
%neg = sub i32 0, %val
%cond = icmp sgt i32 %val, %neg
@@ -36,7 +36,7 @@ define void @v_abs_i32(i32 addrspace(1)*
; GCN: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[NEG]], [[SRC]]
; GCN: v_mul_lo_i32 v{{[0-9]+}}, [[MAX]], [[MAX]]
-define void @v_abs_i32_repeat_user(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
+define amdgpu_kernel void @v_abs_i32_repeat_user(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
%val = load i32, i32 addrspace(1)* %src, align 4
%neg = sub i32 0, %val
%cond = icmp sgt i32 %val, %neg
@@ -54,7 +54,7 @@ define void @v_abs_i32_repeat_user(i32 a
; EG: MAX_INT
; EG: MAX_INT
-define void @s_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %val) nounwind {
+define amdgpu_kernel void @s_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %val) nounwind {
%z0 = insertelement <2 x i32> undef, i32 0, i32 0
%z1 = insertelement <2 x i32> %z0, i32 0, i32 1
%t0 = insertelement <2 x i32> undef, i32 2, i32 0
@@ -79,7 +79,7 @@ define void @s_abs_v2i32(<2 x i32> addrs
; EG: MAX_INT
; EG: MAX_INT
-define void @v_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %src) nounwind {
+define amdgpu_kernel void @v_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %src) nounwind {
%z0 = insertelement <2 x i32> undef, i32 0, i32 0
%z1 = insertelement <2 x i32> %z0, i32 0, i32 1
%t0 = insertelement <2 x i32> undef, i32 2, i32 0
@@ -109,7 +109,7 @@ define void @v_abs_v2i32(<2 x i32> addrs
; EG: MAX_INT
; EG: MAX_INT
; EG: MAX_INT
-define void @s_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %val) nounwind {
+define amdgpu_kernel void @s_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %val) nounwind {
%z0 = insertelement <4 x i32> undef, i32 0, i32 0
%z1 = insertelement <4 x i32> %z0, i32 0, i32 1
%z2 = insertelement <4 x i32> %z1, i32 0, i32 2
@@ -146,7 +146,7 @@ define void @s_abs_v4i32(<4 x i32> addrs
; EG: MAX_INT
; EG: MAX_INT
; EG: MAX_INT
-define void @v_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %src) nounwind {
+define amdgpu_kernel void @v_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %src) nounwind {
%z0 = insertelement <4 x i32> undef, i32 0, i32 0
%z1 = insertelement <4 x i32> %z0, i32 0, i32 1
%z2 = insertelement <4 x i32> %z1, i32 0, i32 2
@@ -170,7 +170,7 @@ define void @v_abs_v4i32(<4 x i32> addrs
; GCN-DAG: s_min_i32 s{{[0-9]+}}, [[VAL0]], [[VAL1]]
; GCN-DAG: s_max_i32 s{{[0-9]+}}, [[VAL0]], [[VAL1]]
-define void @s_min_max_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %val0, i32 %val1) nounwind {
+define amdgpu_kernel void @s_min_max_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %val0, i32 %val1) nounwind {
%cond0 = icmp sgt i32 %val0, %val1
%sel0 = select i1 %cond0, i32 %val0, i32 %val1
%sel1 = select i1 %cond0, i32 %val1, i32 %val0
@@ -186,7 +186,7 @@ define void @s_min_max_i32(i32 addrspace
; GCN-DAG: v_min_i32_e32 v{{[0-9]+}}, [[VAL1]], [[VAL0]]
; GCN-DAG: v_max_i32_e32 v{{[0-9]+}}, [[VAL1]], [[VAL0]]
-define void @v_min_max_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr0, i32 addrspace(1)* %ptr1) nounwind {
+define amdgpu_kernel void @v_min_max_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr0, i32 addrspace(1)* %ptr1) nounwind {
%val0 = load volatile i32, i32 addrspace(1)* %ptr0
%val1 = load volatile i32, i32 addrspace(1)* %ptr1
@@ -208,7 +208,7 @@ define void @v_min_max_i32(i32 addrspace
; GCN-DAG: s_max_i32
; GCN-DAG: s_max_i32
; GCN-DAG: s_max_i32
-define void @s_min_max_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32> addrspace(1)* %out1, <4 x i32> %val0, <4 x i32> %val1) nounwind {
+define amdgpu_kernel void @s_min_max_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32> addrspace(1)* %out1, <4 x i32> %val0, <4 x i32> %val1) nounwind {
%cond0 = icmp sgt <4 x i32> %val0, %val1
%sel0 = select <4 x i1> %cond0, <4 x i32> %val0, <4 x i32> %val1
%sel1 = select <4 x i1> %cond0, <4 x i32> %val1, <4 x i32> %val0
@@ -223,7 +223,7 @@ define void @s_min_max_v4i32(<4 x i32> a
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e32
; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
-define void @v_min_max_i32_user(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr0, i32 addrspace(1)* %ptr1) nounwind {
+define amdgpu_kernel void @v_min_max_i32_user(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr0, i32 addrspace(1)* %ptr1) nounwind {
%val0 = load volatile i32, i32 addrspace(1)* %ptr0
%val1 = load volatile i32, i32 addrspace(1)* %ptr1
Modified: llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll Tue Mar 21 16:39:51 2017
@@ -17,7 +17,7 @@
; CIVI: v_add_i32_e32
; CIVI: v_and_b32_e32 v{{[0-9]+}}, 0xffff,
; CIVI: v_or_b32_e32
-define void @s_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %val) #0 {
+define amdgpu_kernel void @s_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %val) #0 {
%neg = sub <2 x i16> zeroinitializer, %val
%cond = icmp sgt <2 x i16> %val, %neg
%res = select <2 x i1> %cond, <2 x i16> %val, <2 x i16> %neg
@@ -41,7 +41,7 @@ define void @s_abs_v2i16(<2 x i16> addrs
; VI: v_add_u16_e32 v{{[0-9]+}}, 2, v{{[0-9]+}}
; VI-NOT: v_and_b32
; VI: v_or_b32_e32
-define void @v_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %src) #0 {
+define amdgpu_kernel void @v_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %src) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.in = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %src, i32 %tid
%gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
@@ -59,7 +59,7 @@ define void @v_abs_v2i16(<2 x i16> addrs
; GFX9: v_pk_sub_i16 [[SUB:v[0-9]+]], 0, [[VAL]]
; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2
-define void @s_abs_v2i16_2(<2 x i16> addrspace(1)* %out, <2 x i16> %val) #0 {
+define amdgpu_kernel void @s_abs_v2i16_2(<2 x i16> addrspace(1)* %out, <2 x i16> %val) #0 {
%z0 = insertelement <2 x i16> undef, i16 0, i16 0
%z1 = insertelement <2 x i16> %z0, i16 0, i16 1
%t0 = insertelement <2 x i16> undef, i16 2, i16 0
@@ -77,7 +77,7 @@ define void @s_abs_v2i16_2(<2 x i16> add
; GFX9: v_pk_sub_i16 [[SUB:v[0-9]+]], 0, [[VAL]]
; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2
-define void @v_abs_v2i16_2(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %src) #0 {
+define amdgpu_kernel void @v_abs_v2i16_2(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %src) #0 {
%z0 = insertelement <2 x i16> undef, i16 0, i16 0
%z1 = insertelement <2 x i16> %z0, i16 0, i16 1
%t0 = insertelement <2 x i16> undef, i16 2, i16 0
@@ -101,7 +101,7 @@ define void @v_abs_v2i16_2(<2 x i16> add
; GFX9-DAG: v_pk_sub_i16 [[SUB1:v[0-9]+]], 0, [[VAL1]]
; GFX9-DAG: v_pk_max_i16 [[MAX1:v[0-9]+]], [[VAL1]], [[SUB1]]
; GFX9-DAG: v_pk_add_u16 [[ADD1:v[0-9]+]], [[MAX1]], 2
-define void @s_abs_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %val) #0 {
+define amdgpu_kernel void @s_abs_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %val) #0 {
%z0 = insertelement <4 x i16> undef, i16 0, i16 0
%z1 = insertelement <4 x i16> %z0, i16 0, i16 1
%z2 = insertelement <4 x i16> %z1, i16 0, i16 2
@@ -128,7 +128,7 @@ define void @s_abs_v4i16(<4 x i16> addrs
; GFX9-DAG: v_pk_sub_i16 [[SUB1:v[0-9]+]], 0, v[[VAL1]]
; GFX9-DAG: v_pk_max_i16 [[MAX1:v[0-9]+]], v[[VAL1]], [[SUB1]]
; GFX9-DAG: v_pk_add_u16 [[ADD1:v[0-9]+]], [[MAX1]], 2
-define void @v_abs_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %src) #0 {
+define amdgpu_kernel void @v_abs_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %src) #0 {
%z0 = insertelement <4 x i16> undef, i16 0, i16 0
%z1 = insertelement <4 x i16> %z0, i16 0, i16 1
%z2 = insertelement <4 x i16> %z1, i16 0, i16 2
@@ -147,7 +147,7 @@ define void @v_abs_v4i16(<4 x i16> addrs
}
; GCN-LABEL: {{^}}s_min_max_v2i16:
-define void @s_min_max_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16> addrspace(1)* %out1, <2 x i16> %val0, <2 x i16> %val1) #0 {
+define amdgpu_kernel void @s_min_max_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16> addrspace(1)* %out1, <2 x i16> %val0, <2 x i16> %val1) #0 {
%cond0 = icmp sgt <2 x i16> %val0, %val1
%sel0 = select <2 x i1> %cond0, <2 x i16> %val0, <2 x i16> %val1
%sel1 = select <2 x i1> %cond0, <2 x i16> %val1, <2 x i16> %val0
@@ -158,7 +158,7 @@ define void @s_min_max_v2i16(<2 x i16> a
}
; GCN-LABEL: {{^}}v_min_max_v2i16:
-define void @v_min_max_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16> addrspace(1)* %out1, <2 x i16> addrspace(1)* %ptr0, <2 x i16> addrspace(1)* %ptr1) #0 {
+define amdgpu_kernel void @v_min_max_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16> addrspace(1)* %out1, <2 x i16> addrspace(1)* %ptr0, <2 x i16> addrspace(1)* %ptr1) #0 {
%val0 = load volatile <2 x i16>, <2 x i16> addrspace(1)* %ptr0
%val1 = load volatile <2 x i16>, <2 x i16> addrspace(1)* %ptr1
@@ -172,7 +172,7 @@ define void @v_min_max_v2i16(<2 x i16> a
}
; GCN-LABEL: {{^}}s_min_max_v4i32:
-define void @s_min_max_v4i32(<4 x i16> addrspace(1)* %out0, <4 x i16> addrspace(1)* %out1, <4 x i16> %val0, <4 x i16> %val1) #0 {
+define amdgpu_kernel void @s_min_max_v4i32(<4 x i16> addrspace(1)* %out0, <4 x i16> addrspace(1)* %out1, <4 x i16> %val0, <4 x i16> %val1) #0 {
%cond0 = icmp sgt <4 x i16> %val0, %val1
%sel0 = select <4 x i1> %cond0, <4 x i16> %val0, <4 x i16> %val1
%sel1 = select <4 x i1> %cond0, <4 x i16> %val1, <4 x i16> %val0
@@ -183,7 +183,7 @@ define void @s_min_max_v4i32(<4 x i16> a
}
; GCN-LABEL: {{^}}v_min_max_v2i16_user:
-define void @v_min_max_v2i16_user(<2 x i16> addrspace(1)* %out0, <2 x i16> addrspace(1)* %out1, <2 x i16> addrspace(1)* %ptr0, <2 x i16> addrspace(1)* %ptr1) #0 {
+define amdgpu_kernel void @v_min_max_v2i16_user(<2 x i16> addrspace(1)* %out0, <2 x i16> addrspace(1)* %out1, <2 x i16> addrspace(1)* %ptr0, <2 x i16> addrspace(1)* %ptr1) #0 {
%val0 = load volatile <2 x i16>, <2 x i16> addrspace(1)* %ptr0
%val1 = load volatile <2 x i16>, <2 x i16> addrspace(1)* %ptr1
@@ -200,7 +200,7 @@ define void @v_min_max_v2i16_user(<2 x i
; GCN-LABEL: {{^}}u_min_max_v2i16:
; GFX9: v_pk_max_u16 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
; GFX9: v_pk_min_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
-define void @u_min_max_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16> addrspace(1)* %out1, <2 x i16> %val0, <2 x i16> %val1) nounwind {
+define amdgpu_kernel void @u_min_max_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16> addrspace(1)* %out1, <2 x i16> %val0, <2 x i16> %val1) nounwind {
%cond0 = icmp ugt <2 x i16> %val0, %val1
%sel0 = select <2 x i1> %cond0, <2 x i16> %val0, <2 x i16> %val1
%sel1 = select <2 x i1> %cond0, <2 x i16> %val1, <2 x i16> %val0
Modified: llvm/trunk/test/CodeGen/AMDGPU/smrd-vccz-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/smrd-vccz-bug.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/smrd-vccz-bug.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/smrd-vccz-bug.ll Tue Mar 21 16:39:51 2017
@@ -12,7 +12,7 @@
; GCN: buffer_store_dword
; GCN: [[EXIT]]:
; GCN: s_endpgm
-define void @vccz_workaround(i32 addrspace(2)* %in, i32 addrspace(1)* %out, float %cond) {
+define amdgpu_kernel void @vccz_workaround(i32 addrspace(2)* %in, i32 addrspace(1)* %out, float %cond) {
entry:
%cnd = fcmp oeq float 0.0, %cond
%sgpr = load volatile i32, i32 addrspace(2)* %in
@@ -32,7 +32,7 @@ endif:
; GCN: buffer_store_dword
; GCN: [[EXIT]]:
; GCN: s_endpgm
-define void @vccz_noworkaround(float addrspace(1)* %in, float addrspace(1)* %out) {
+define amdgpu_kernel void @vccz_noworkaround(float addrspace(1)* %in, float addrspace(1)* %out) {
entry:
%vgpr = load volatile float, float addrspace(1)* %in
%cnd = fcmp oeq float 0.0, %vgpr
Modified: llvm/trunk/test/CodeGen/AMDGPU/smrd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/smrd.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/smrd.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/smrd.ll Tue Mar 21 16:39:51 2017
@@ -6,7 +6,7 @@
; GCN-LABEL: {{^}}smrd0:
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
-define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
+define amdgpu_kernel void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
entry:
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
%tmp1 = load i32, i32 addrspace(2)* %tmp
@@ -18,7 +18,7 @@ entry:
; GCN-LABEL: {{^}}smrd1:
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}}
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
-define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
+define amdgpu_kernel void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
entry:
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
%tmp1 = load i32, i32 addrspace(2)* %tmp
@@ -33,7 +33,7 @@ entry:
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
; GCN: s_endpgm
-define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
+define amdgpu_kernel void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
entry:
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
%tmp1 = load i32, i32 addrspace(2)* %tmp
@@ -48,7 +48,7 @@ entry:
; SI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b
; TODO: Add VI checks
; GCN: s_endpgm
-define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
+define amdgpu_kernel void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
entry:
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296
%tmp1 = load i32, i32 addrspace(2)* %tmp
@@ -62,7 +62,7 @@ entry:
; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
-define void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
+define amdgpu_kernel void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
entry:
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
%tmp1 = load i32, i32 addrspace(2)* %tmp
@@ -76,7 +76,7 @@ entry:
; SIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
; GCN: s_endpgm
-define void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
+define amdgpu_kernel void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
entry:
%tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
%tmp1 = load i32, i32 addrspace(2)* %tmp
Modified: llvm/trunk/test/CodeGen/AMDGPU/sopk-compares.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sopk-compares.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sopk-compares.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sopk-compares.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@ declare i32 @llvm.amdgcn.groupstaticsize
; GCN-LABEL: {{^}}br_scc_eq_i32_inline_imm:
; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 4{{$}}
-define void @br_scc_eq_i32_inline_imm(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_eq_i32_inline_imm(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = icmp eq i32 %cond, 4
br i1 %cmp0, label %endif, label %if
@@ -25,7 +25,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_max:
; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x7fff{{$}}
-define void @br_scc_eq_i32_simm16_max(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_eq_i32_simm16_max(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = icmp eq i32 %cond, 32767
br i1 %cmp0, label %endif, label %if
@@ -41,7 +41,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_max_p1:
; GCN: s_cmpk_eq_u32 s{{[0-9]+}}, 0x8000{{$}}
-define void @br_scc_eq_i32_simm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_eq_i32_simm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = icmp eq i32 %cond, 32768
br i1 %cmp0, label %endif, label %if
@@ -57,7 +57,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_ne_i32_simm16_max_p1:
; GCN: s_cmpk_lg_u32 s{{[0-9]+}}, 0x8000{{$}}
-define void @br_scc_ne_i32_simm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_ne_i32_simm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = icmp ne i32 %cond, 32768
br i1 %cmp0, label %endif, label %if
@@ -73,7 +73,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_min:
; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x8000{{$}}
-define void @br_scc_eq_i32_simm16_min(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_eq_i32_simm16_min(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = icmp eq i32 %cond, -32768
br i1 %cmp0, label %endif, label %if
@@ -89,7 +89,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_min_m1:
; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 0xffff7fff{{$}}
-define void @br_scc_eq_i32_simm16_min_m1(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_eq_i32_simm16_min_m1(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = icmp eq i32 %cond, -32769
br i1 %cmp0, label %endif, label %if
@@ -105,7 +105,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_eq_i32_uimm15_max:
; GCN: s_cmpk_eq_u32 s{{[0-9]+}}, 0xffff{{$}}
-define void @br_scc_eq_i32_uimm15_max(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_eq_i32_uimm15_max(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = icmp eq i32 %cond, 65535
br i1 %cmp0, label %endif, label %if
@@ -121,7 +121,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_eq_i32_uimm16_max:
; GCN: s_cmpk_eq_u32 s{{[0-9]+}}, 0xffff{{$}}
-define void @br_scc_eq_i32_uimm16_max(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_eq_i32_uimm16_max(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = icmp eq i32 %cond, 65535
br i1 %cmp0, label %endif, label %if
@@ -137,7 +137,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_eq_i32_uimm16_max_p1:
; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 0x10000{{$}}
-define void @br_scc_eq_i32_uimm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_eq_i32_uimm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = icmp eq i32 %cond, 65536
br i1 %cmp0, label %endif, label %if
@@ -154,7 +154,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_eq_i32:
; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x41{{$}}
-define void @br_scc_eq_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_eq_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = icmp eq i32 %cond, 65
br i1 %cmp0, label %endif, label %if
@@ -170,7 +170,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_ne_i32:
; GCN: s_cmpk_lg_i32 s{{[0-9]+}}, 0x41{{$}}
-define void @br_scc_ne_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_ne_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = icmp ne i32 %cond, 65
br i1 %cmp0, label %endif, label %if
@@ -186,7 +186,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_sgt_i32:
; GCN: s_cmpk_gt_i32 s{{[0-9]+}}, 0x41{{$}}
-define void @br_scc_sgt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_sgt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = icmp sgt i32 %cond, 65
br i1 %cmp0, label %endif, label %if
@@ -202,7 +202,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_sgt_i32_simm16_max:
; GCN: s_cmpk_gt_i32 s{{[0-9]+}}, 0x7fff{{$}}
-define void @br_scc_sgt_i32_simm16_max(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_sgt_i32_simm16_max(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = icmp sgt i32 %cond, 32767
br i1 %cmp0, label %endif, label %if
@@ -218,7 +218,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_sgt_i32_simm16_max_p1:
; GCN: s_cmp_gt_i32 s{{[0-9]+}}, 0x8000{{$}}
-define void @br_scc_sgt_i32_simm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_sgt_i32_simm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = icmp sgt i32 %cond, 32768
br i1 %cmp0, label %endif, label %if
@@ -234,7 +234,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_sge_i32:
; GCN: s_cmpk_ge_i32 s{{[0-9]+}}, 0x800{{$}}
-define void @br_scc_sge_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_sge_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%size = call i32 @llvm.amdgcn.groupstaticsize()
%cmp0 = icmp sge i32 %cond, %size
@@ -251,7 +251,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_slt_i32:
; GCN: s_cmpk_lt_i32 s{{[0-9]+}}, 0x41{{$}}
-define void @br_scc_slt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_slt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = icmp slt i32 %cond, 65
br i1 %cmp0, label %endif, label %if
@@ -267,7 +267,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_sle_i32:
; GCN: s_cmpk_le_i32 s{{[0-9]+}}, 0x800{{$}}
-define void @br_scc_sle_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_sle_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%size = call i32 @llvm.amdgcn.groupstaticsize()
%cmp0 = icmp sle i32 %cond, %size
@@ -284,7 +284,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_ugt_i32:
; GCN: s_cmpk_gt_u32 s{{[0-9]+}}, 0x800{{$}}
-define void @br_scc_ugt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_ugt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%size = call i32 @llvm.amdgcn.groupstaticsize()
%cmp0 = icmp ugt i32 %cond, %size
@@ -301,7 +301,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_uge_i32:
; GCN: s_cmpk_ge_u32 s{{[0-9]+}}, 0x800{{$}}
-define void @br_scc_uge_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_uge_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%size = call i32 @llvm.amdgcn.groupstaticsize()
%cmp0 = icmp uge i32 %cond, %size
@@ -318,7 +318,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_ult_i32:
; GCN: s_cmpk_lt_u32 s{{[0-9]+}}, 0x41{{$}}
-define void @br_scc_ult_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_ult_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = icmp ult i32 %cond, 65
br i1 %cmp0, label %endif, label %if
@@ -334,7 +334,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_ult_i32_min_simm16:
; GCN: s_cmp_lt_u32 s2, 0xffff8000
-define void @br_scc_ult_i32_min_simm16(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_ult_i32_min_simm16(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = icmp ult i32 %cond, -32768
br i1 %cmp0, label %endif, label %if
@@ -350,7 +350,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_ult_i32_min_simm16_m1:
; GCN: s_cmp_lt_u32 s{{[0-9]+}}, 0xffff7fff{{$}}
-define void @br_scc_ult_i32_min_simm16_m1(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_ult_i32_min_simm16_m1(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = icmp ult i32 %cond, -32769
br i1 %cmp0, label %endif, label %if
@@ -366,7 +366,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_ule_i32:
; GCN: s_cmpk_le_u32 s{{[0-9]+}}, 0x800{{$}}
-define void @br_scc_ule_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_ule_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%size = call i32 @llvm.amdgcn.groupstaticsize()
%cmp0 = icmp ule i32 %cond, %size
@@ -383,7 +383,7 @@ endif:
; GCN-LABEL: {{^}}commute_br_scc_eq_i32:
; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x800{{$}}
-define void @commute_br_scc_eq_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @commute_br_scc_eq_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%size = call i32 @llvm.amdgcn.groupstaticsize()
%cmp0 = icmp eq i32 %size, %cond
@@ -400,7 +400,7 @@ endif:
; GCN-LABEL: {{^}}commute_br_scc_ne_i32:
; GCN: s_cmpk_lg_i32 s{{[0-9]+}}, 0x800{{$}}
-define void @commute_br_scc_ne_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @commute_br_scc_ne_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%size = call i32 @llvm.amdgcn.groupstaticsize()
%cmp0 = icmp ne i32 %size, %cond
@@ -417,7 +417,7 @@ endif:
; GCN-LABEL: {{^}}commute_br_scc_sgt_i32:
; GCN: s_cmpk_lt_i32 s{{[0-9]+}}, 0x800{{$}}
-define void @commute_br_scc_sgt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @commute_br_scc_sgt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%size = call i32 @llvm.amdgcn.groupstaticsize()
%cmp0 = icmp sgt i32 %size, %cond
@@ -434,7 +434,7 @@ endif:
; GCN-LABEL: {{^}}commute_br_scc_sge_i32:
; GCN: s_cmpk_le_i32 s{{[0-9]+}}, 0x800{{$}}
-define void @commute_br_scc_sge_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @commute_br_scc_sge_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%size = call i32 @llvm.amdgcn.groupstaticsize()
%cmp0 = icmp sge i32 %size, %cond
@@ -451,7 +451,7 @@ endif:
; GCN-LABEL: {{^}}commute_br_scc_slt_i32:
; GCN: s_cmpk_gt_i32 s{{[0-9]+}}, 0x800{{$}}
-define void @commute_br_scc_slt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @commute_br_scc_slt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%size = call i32 @llvm.amdgcn.groupstaticsize()
%cmp0 = icmp slt i32 %size, %cond
@@ -468,7 +468,7 @@ endif:
; GCN-LABEL: {{^}}commute_br_scc_sle_i32:
; GCN: s_cmpk_ge_i32 s{{[0-9]+}}, 0x800{{$}}
-define void @commute_br_scc_sle_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @commute_br_scc_sle_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%size = call i32 @llvm.amdgcn.groupstaticsize()
%cmp0 = icmp sle i32 %size, %cond
@@ -485,7 +485,7 @@ endif:
; GCN-LABEL: {{^}}commute_br_scc_ugt_i32:
; GCN: s_cmpk_lt_u32 s{{[0-9]+}}, 0x800{{$}}
-define void @commute_br_scc_ugt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @commute_br_scc_ugt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%size = call i32 @llvm.amdgcn.groupstaticsize()
%cmp0 = icmp ugt i32 %size, %cond
@@ -502,7 +502,7 @@ endif:
; GCN-LABEL: {{^}}commute_br_scc_uge_i32:
; GCN: s_cmpk_le_u32 s{{[0-9]+}}, 0x800{{$}}
-define void @commute_br_scc_uge_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @commute_br_scc_uge_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%size = call i32 @llvm.amdgcn.groupstaticsize()
%cmp0 = icmp uge i32 %size, %cond
@@ -519,7 +519,7 @@ endif:
; GCN-LABEL: {{^}}commute_br_scc_ult_i32:
; GCN: s_cmpk_gt_u32 s{{[0-9]+}}, 0x800{{$}}
-define void @commute_br_scc_ult_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @commute_br_scc_ult_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%size = call i32 @llvm.amdgcn.groupstaticsize()
%cmp0 = icmp ult i32 %size, %cond
@@ -536,7 +536,7 @@ endif:
; GCN-LABEL: {{^}}commute_br_scc_ule_i32:
; GCN: s_cmpk_ge_u32 s{{[0-9]+}}, 0x800{{$}}
-define void @commute_br_scc_ule_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @commute_br_scc_ule_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%size = call i32 @llvm.amdgcn.groupstaticsize()
%cmp0 = icmp ule i32 %size, %cond
@@ -553,7 +553,7 @@ endif:
; GCN-LABEL: {{^}}br_scc_ult_i32_non_u16:
; GCN: s_cmp_lt_u32 s2, 0xfffff7ff
-define void @br_scc_ult_i32_non_u16(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_ult_i32_non_u16(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%size = call i32 @llvm.amdgcn.groupstaticsize()
%not.size = xor i32 %size, -1
@@ -573,7 +573,7 @@ endif:
; VI: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, 4
; SI: v_cmp_eq_u64_e64
-define void @br_scc_eq_i64_inline_imm(i64 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_eq_i64_inline_imm(i64 %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = icmp eq i64 %cond, 4
br i1 %cmp0, label %endif, label %if
@@ -593,7 +593,7 @@ endif:
; VI: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
; SI: v_cmp_eq_u64_e32
-define void @br_scc_eq_i64_simm16(i64 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_eq_i64_simm16(i64 %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = icmp eq i64 %cond, 1234
br i1 %cmp0, label %endif, label %if
@@ -611,7 +611,7 @@ endif:
; VI: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, 4
; SI: v_cmp_ne_u64_e64
-define void @br_scc_ne_i64_inline_imm(i64 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_ne_i64_inline_imm(i64 %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = icmp ne i64 %cond, 4
br i1 %cmp0, label %endif, label %if
@@ -631,7 +631,7 @@ endif:
; VI: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
; SI: v_cmp_ne_u64_e32
-define void @br_scc_ne_i64_simm16(i64 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @br_scc_ne_i64_simm16(i64 %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = icmp ne i64 %cond, 1234
br i1 %cmp0, label %endif, label %if
Modified: llvm/trunk/test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll Tue Mar 21 16:39:51 2017
@@ -4,7 +4,7 @@
; allocate scratch registers correctly. Check that this test compiles without
; error.
; TONGA-LABEL: test
-define void @test(<256 x i32> addrspace(1)* %out, <256 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @test(<256 x i32> addrspace(1)* %out, <256 x i32> addrspace(1)* %in) {
entry:
%mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo)
Modified: llvm/trunk/test/CodeGen/AMDGPU/spill-cfg-position.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/spill-cfg-position.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/spill-cfg-position.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/spill-cfg-position.ll Tue Mar 21 16:39:51 2017
@@ -13,7 +13,7 @@
; CHECK-NEXT: s_or_b64 exec
; CHECK: buffer_
-define void @spill_cfg_position(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @spill_cfg_position(i32 addrspace(1)* nocapture %arg) {
bb:
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%tmp14 = load i32, i32 addrspace(1)* %arg, align 4
Modified: llvm/trunk/test/CodeGen/AMDGPU/spill-m0.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/spill-m0.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/spill-m0.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/spill-m0.ll Tue Mar 21 16:39:51 2017
@@ -43,7 +43,7 @@
; TOSMEM: s_mov_b32 m0, [[M0_RESTORE]]
; GCN: s_add_i32 s{{[0-9]+}}, m0, 1
-define void @spill_m0(i32 %cond, i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @spill_m0(i32 %cond, i32 addrspace(1)* %out) #0 {
entry:
%m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={M0}"() #0
%cmp0 = icmp eq i32 %cond, 0
@@ -136,7 +136,7 @@ endif:
; GCN-NOT: v_readlane_b32 m0
; GCN-NOT: s_buffer_store_dword m0
; GCN-NOT: s_buffer_load_dword m0
-define void @m0_unavailable_spill(i32 %m0.arg) #0 {
+define amdgpu_kernel void @m0_unavailable_spill(i32 %m0.arg) #0 {
main_body:
%m0 = call i32 asm sideeffect "; def $0, 1", "={M0}"() #0
%tmp = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %m0.arg)
@@ -189,7 +189,7 @@ endif:
; TOSMEM: s_dcache_wb
; TOSMEM: s_endpgm
-define void @restore_m0_lds(i32 %arg) {
+define amdgpu_kernel void @restore_m0_lds(i32 %arg) {
%m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={M0}"() #0
%sval = load volatile i64, i64 addrspace(2)* undef
%cmp = icmp eq i32 %arg, 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/spill-scavenge-offset.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/spill-scavenge-offset.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/spill-scavenge-offset.ll Tue Mar 21 16:39:51 2017
@@ -11,7 +11,7 @@
; Just test that it compiles successfully.
; CHECK-LABEL: test
-define void @test(<1280 x i32> addrspace(1)* %out, <1280 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @test(<1280 x i32> addrspace(1)* %out, <1280 x i32> addrspace(1)* %in) {
entry:
%lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo)
Modified: llvm/trunk/test/CodeGen/AMDGPU/split-scalar-i64-add.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/split-scalar-i64-add.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/split-scalar-i64-add.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/split-scalar-i64-add.ll Tue Mar 21 16:39:51 2017
@@ -10,7 +10,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_0:
; SI: v_add_i32_e32 v{{[0-9]+}}, vcc, 0x18f, v{{[0-9]+}}
; SI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
-define void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %s.val) {
+define amdgpu_kernel void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %s.val) {
%v.val = load volatile i32, i32 addrspace(1)* %in
%vec.0 = insertelement <2 x i32> undef, i32 %s.val, i32 0
%vec.1 = insertelement <2 x i32> %vec.0, i32 %v.val, i32 1
@@ -23,7 +23,7 @@ define void @imp_def_vcc_split_i64_add_0
; FUNC-LABEL: {{^}}s_imp_def_vcc_split_i64_add_0:
; SI: s_add_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x18f
; SI: s_addc_u32 {{s[0-9]+}}, 0xf423f, 0
-define void @s_imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 %val) {
+define amdgpu_kernel void @s_imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 %val) {
%vec.0 = insertelement <2 x i32> undef, i32 %val, i32 0
%vec.1 = insertelement <2 x i32> %vec.0, i32 999999, i32 1
%bc = bitcast <2 x i32> %vec.1 to i64
@@ -35,7 +35,7 @@ define void @s_imp_def_vcc_split_i64_add
; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_1:
; SI: v_add_i32
; SI: v_addc_u32
-define void @imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {
+define amdgpu_kernel void @imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {
%v.val = load volatile i32, i32 addrspace(1)* %in
%vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
%vec.1 = insertelement <2 x i32> %vec.0, i32 %v.val, i32 1
@@ -48,7 +48,7 @@ define void @imp_def_vcc_split_i64_add_1
; FUNC-LABEL: {{^}}s_imp_def_vcc_split_i64_add_1:
; SI: s_add_u32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
; SI: s_addc_u32 {{s[0-9]+}}, 0x1869f, {{s[0-9]+}}
-define void @s_imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 %val0, i64 %val1) {
+define amdgpu_kernel void @s_imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 %val0, i64 %val1) {
%vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
%vec.1 = insertelement <2 x i32> %vec.0, i32 99999, i32 1
%bc = bitcast <2 x i32> %vec.1 to i64
@@ -61,7 +61,7 @@ define void @s_imp_def_vcc_split_i64_add
; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_2:
; SI: v_add_i32_e32 {{v[0-9]+}}, vcc, {{s[0-9]+}}, {{v[0-9]+}}
; SI: v_addc_u32_e32 {{v[0-9]+}}, vcc, {{v[0-9]+}}, {{v[0-9]+}}, vcc
-define void @imp_def_vcc_split_i64_add_2(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {
+define amdgpu_kernel void @imp_def_vcc_split_i64_add_2(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
%gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%load = load i32, i32 addrspace(1)* %gep
Modified: llvm/trunk/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll Tue Mar 21 16:39:51 2017
@@ -29,7 +29,7 @@
; GCN-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24
; GCN: s_endpgm
-define void @ds_reorder_vector_split(<4 x i64> addrspace(1)* nocapture readonly %srcValues, i32 addrspace(1)* nocapture readonly %offsets, <4 x i64> addrspace(1)* nocapture %destBuffer, i32 %alignmentOffset) #0 {
+define amdgpu_kernel void @ds_reorder_vector_split(<4 x i64> addrspace(1)* nocapture readonly %srcValues, i32 addrspace(1)* nocapture readonly %offsets, <4 x i64> addrspace(1)* nocapture %destBuffer, i32 %alignmentOffset) #0 {
entry:
%tmp = tail call i32 @llvm.r600.read.local.size.y()
%tmp1 = tail call i32 @llvm.r600.read.local.size.z()
Modified: llvm/trunk/test/CodeGen/AMDGPU/splitkit.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/splitkit.mir?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/splitkit.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/splitkit.mir Tue Mar 21 16:39:51 2017
@@ -1,7 +1,7 @@
# RUN: llc -o - %s -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs -run-pass=greedy,virtregrewriter | FileCheck %s
--- |
- define void @func0() #0 { ret void }
- define void @func1() #0 { ret void }
+ define amdgpu_kernel void @func0() #0 { ret void }
+ define amdgpu_kernel void @func1() #0 { ret void }
attributes #0 = { "amdgpu-num-sgpr"="12" }
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/sra.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sra.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sra.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sra.ll Tue Mar 21 16:39:51 2017
@@ -13,7 +13,7 @@ declare i32 @llvm.r600.read.tidig.x() #0
; EG: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
%a = load <2 x i32>, <2 x i32> addrspace(1)* %in
%b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
@@ -37,7 +37,7 @@ define void @ashr_v2i32(<2 x i32> addrsp
; EG: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
%a = load <4 x i32>, <4 x i32> addrspace(1)* %in
%b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
@@ -51,7 +51,7 @@ define void @ashr_v4i32(<4 x i32> addrsp
; global load we end up with the vector instructions rather than scalar.
; VI: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; VI: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-define void @ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i16 1
%a = load <2 x i16>, <2 x i16> addrspace(1)* %in
%b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr
@@ -67,7 +67,7 @@ define void @ashr_v2i16(<2 x i16> addrsp
; VI: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; VI: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; VI: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-define void @ashr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i16 1
%a = load <4 x i16>, <4 x i16> addrspace(1)* %in
%b = load <4 x i16>, <4 x i16> addrspace(1)* %b_ptr
@@ -80,7 +80,7 @@ define void @ashr_v4i16(<4 x i16> addrsp
; GCN: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8
; EG: ASHR
-define void @s_ashr_i64(i64 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_ashr_i64(i64 addrspace(1)* %out, i32 %in) {
entry:
%in.ext = sext i32 %in to i64
%ashr = ashr i64 %in.ext, 8
@@ -105,7 +105,7 @@ entry:
; EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
; EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
-define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
entry:
%b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
%a = load i64, i64 addrspace(1)* %in
@@ -143,7 +143,7 @@ entry:
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
-define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
%a = load <2 x i64>, <2 x i64> addrspace(1)* %in
%b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
@@ -156,7 +156,7 @@ define void @ashr_v2i64(<2 x i64> addrsp
; XFUNC-LABEL: {{^}}s_ashr_v2i64:
; XGCN: s_ashr_i64 {{s\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], s[0-9]+}}
; XGCN: s_ashr_i64 {{s\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], s[0-9]+}}
-; define void @s_ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in, <2 x i64> %a, <2 x i64> %b) {
+; define amdgpu_kernel void @s_ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in, <2 x i64> %a, <2 x i64> %b) {
; %result = ashr <2 x i64> %a, %b
; store <2 x i64> %result, <2 x i64> addrspace(1)* %out
; ret void
@@ -221,7 +221,7 @@ define void @ashr_v2i64(<2 x i64> addrsp
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
-define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
%a = load <4 x i64>, <4 x i64> addrspace(1)* %in
%b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
@@ -235,7 +235,7 @@ define void @ashr_v4i64(<4 x i64> addrsp
; GCN: s_ashr_i32 s[[SHIFT:[0-9]+]], s[[HI]], 31
; GCN: s_add_u32 s{{[0-9]+}}, s[[HI]], s{{[0-9]+}}
; GCN: s_addc_u32 s{{[0-9]+}}, s[[SHIFT]], s{{[0-9]+}}
-define void @s_ashr_32_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @s_ashr_32_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
%result = ashr i64 %a, 32
%add = add i64 %result, %b
store i64 %add, i64 addrspace(1)* %out
@@ -247,7 +247,7 @@ define void @s_ashr_32_i64(i64 addrspace
; VI: flat_load_dword v[[HI:[0-9]+]]
; GCN: v_ashrrev_i32_e32 v[[SHIFT:[0-9]+]], 31, v[[HI]]
; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[HI]]:[[SHIFT]]{{\]}}
-define void @v_ashr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @v_ashr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() #0
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
@@ -262,7 +262,7 @@ define void @v_ashr_32_i64(i64 addrspace
; GCN: s_ashr_i32 s[[SHIFT:[0-9]+]], s[[HI]], 31
; GCN: s_add_u32 {{s[0-9]+}}, s[[SHIFT]], {{s[0-9]+}}
; GCN: s_addc_u32 {{s[0-9]+}}, s[[SHIFT]], {{s[0-9]+}}
-define void @s_ashr_63_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @s_ashr_63_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
%result = ashr i64 %a, 63
%add = add i64 %result, %b
store i64 %add, i64 addrspace(1)* %out
@@ -275,7 +275,7 @@ define void @s_ashr_63_i64(i64 addrspace
; GCN: v_ashrrev_i32_e32 v[[SHIFT:[0-9]+]], 31, v[[HI]]
; GCN: v_mov_b32_e32 v[[COPY:[0-9]+]], v[[SHIFT]]
; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[SHIFT]]:[[COPY]]{{\]}}
-define void @v_ashr_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @v_ashr_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() #0
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
Modified: llvm/trunk/test/CodeGen/AMDGPU/srem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/srem.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/srem.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/srem.ll Tue Mar 21 16:39:51 2017
@@ -2,7 +2,7 @@
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s
-define void @srem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @srem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%num = load i32, i32 addrspace(1) * %in
%den = load i32, i32 addrspace(1) * %den_ptr
@@ -11,7 +11,7 @@ define void @srem_i32(i32 addrspace(1)*
ret void
}
-define void @srem_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @srem_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%num = load i32, i32 addrspace(1) * %in
%result = srem i32 %num, 4
store i32 %result, i32 addrspace(1)* %out
@@ -24,14 +24,14 @@ define void @srem_i32_4(i32 addrspace(1)
; SI: v_mul_lo_i32
; SI: v_sub_i32
; SI: s_endpgm
-define void @srem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @srem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%num = load i32, i32 addrspace(1) * %in
%result = srem i32 %num, 7
store i32 %result, i32 addrspace(1)* %out
ret void
}
-define void @srem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @srem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%den_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
%num = load <2 x i32>, <2 x i32> addrspace(1) * %in
%den = load <2 x i32>, <2 x i32> addrspace(1) * %den_ptr
@@ -40,14 +40,14 @@ define void @srem_v2i32(<2 x i32> addrsp
ret void
}
-define void @srem_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @srem_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%num = load <2 x i32>, <2 x i32> addrspace(1) * %in
%result = srem <2 x i32> %num, <i32 4, i32 4>
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
}
-define void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%den_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
%num = load <4 x i32>, <4 x i32> addrspace(1) * %in
%den = load <4 x i32>, <4 x i32> addrspace(1) * %den_ptr
@@ -56,14 +56,14 @@ define void @srem_v4i32(<4 x i32> addrsp
ret void
}
-define void @srem_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @srem_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%num = load <4 x i32>, <4 x i32> addrspace(1) * %in
%result = srem <4 x i32> %num, <i32 4, i32 4, i32 4, i32 4>
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
}
-define void @srem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @srem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%den_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
%num = load i64, i64 addrspace(1) * %in
%den = load i64, i64 addrspace(1) * %den_ptr
@@ -72,14 +72,14 @@ define void @srem_i64(i64 addrspace(1)*
ret void
}
-define void @srem_i64_4(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @srem_i64_4(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%num = load i64, i64 addrspace(1) * %in
%result = srem i64 %num, 4
store i64 %result, i64 addrspace(1)* %out
ret void
}
-define void @srem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+define amdgpu_kernel void @srem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
%den_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
%num = load <2 x i64>, <2 x i64> addrspace(1) * %in
%den = load <2 x i64>, <2 x i64> addrspace(1) * %den_ptr
@@ -88,14 +88,14 @@ define void @srem_v2i64(<2 x i64> addrsp
ret void
}
-define void @srem_v2i64_4(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+define amdgpu_kernel void @srem_v2i64_4(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
%num = load <2 x i64>, <2 x i64> addrspace(1) * %in
%result = srem <2 x i64> %num, <i64 4, i64 4>
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
ret void
}
-define void @srem_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+define amdgpu_kernel void @srem_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
%den_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
%num = load <4 x i64>, <4 x i64> addrspace(1) * %in
%den = load <4 x i64>, <4 x i64> addrspace(1) * %den_ptr
@@ -104,7 +104,7 @@ define void @srem_v4i64(<4 x i64> addrsp
ret void
}
-define void @srem_v4i64_4(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+define amdgpu_kernel void @srem_v4i64_4(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
%num = load <4 x i64>, <4 x i64> addrspace(1) * %in
%result = srem <4 x i64> %num, <i64 4, i64 4, i64 4, i64 4>
store <4 x i64> %result, <4 x i64> addrspace(1)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/srl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/srl.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/srl.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/srl.ll Tue Mar 21 16:39:51 2017
@@ -8,7 +8,7 @@ declare i32 @llvm.r600.read.tidig.x() #0
; SI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%a = load i32, i32 addrspace(1)* %in
%b = load i32, i32 addrspace(1)* %b_ptr
@@ -26,7 +26,7 @@ define void @lshr_i32(i32 addrspace(1)*
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
%a = load <2 x i32>, <2 x i32> addrspace(1)* %in
%b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
@@ -50,7 +50,7 @@ define void @lshr_v2i32(<2 x i32> addrsp
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
%a = load <4 x i32>, <4 x i32> addrspace(1)* %in
%b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
@@ -74,7 +74,7 @@ define void @lshr_v4i32(<4 x i32> addrsp
; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]|PS}}
; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], [[SHIFT]]
; EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
-define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
%a = load i64, i64 addrspace(1)* %in
%b = load i64, i64 addrspace(1)* %b_ptr
@@ -112,7 +112,7 @@ define void @lshr_i64(i64 addrspace(1)*
; EG-DAG: CNDE_INT {{.*}}, 0.0
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
-define void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+define amdgpu_kernel void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
%a = load <2 x i64>, <2 x i64> addrspace(1)* %in
%b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
@@ -178,7 +178,7 @@ define void @lshr_v2i64(<2 x i64> addrsp
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
-define void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+define amdgpu_kernel void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
%a = load <4 x i64>, <4 x i64> addrspace(1)* %in
%b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
@@ -193,7 +193,7 @@ define void @lshr_v4i64(<4 x i64> addrsp
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[HI_A]]
; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
-define void @s_lshr_32_i64(i64 addrspace(1)* %out, i64 %a) {
+define amdgpu_kernel void @s_lshr_32_i64(i64 addrspace(1)* %out, i64 %a) {
%result = lshr i64 %a, 32
store i64 %result, i64 addrspace(1)* %out
ret void
@@ -203,7 +203,7 @@ define void @s_lshr_32_i64(i64 addrspace
; GCN-DAG: buffer_load_dword v[[HI_A:[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[HI_A]]:[[VHI]]{{\]}}
-define void @v_lshr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @v_lshr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() #0
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
Modified: llvm/trunk/test/CodeGen/AMDGPU/ssubo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ssubo.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ssubo.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ssubo.ll Tue Mar 21 16:39:51 2017
@@ -6,7 +6,7 @@ declare { i32, i1 } @llvm.ssub.with.over
declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
; FUNC-LABEL: {{^}}ssubo_i64_zext:
-define void @ssubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
+define amdgpu_kernel void @ssubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %ssub, 0
%carry = extractvalue { i64, i1 } %ssub, 1
@@ -17,7 +17,7 @@ define void @ssubo_i64_zext(i64 addrspac
}
; FUNC-LABEL: {{^}}s_ssubo_i32:
-define void @s_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @s_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
%ssub = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind
%val = extractvalue { i32, i1 } %ssub, 0
%carry = extractvalue { i32, i1 } %ssub, 1
@@ -27,7 +27,7 @@ define void @s_ssubo_i32(i32 addrspace(1
}
; FUNC-LABEL: {{^}}v_ssubo_i32:
-define void @v_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+define amdgpu_kernel void @v_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%a = load i32, i32 addrspace(1)* %aptr, align 4
%b = load i32, i32 addrspace(1)* %bptr, align 4
%ssub = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind
@@ -41,7 +41,7 @@ define void @v_ssubo_i32(i32 addrspace(1
; FUNC-LABEL: {{^}}s_ssubo_i64:
; SI: s_sub_u32
; SI: s_subb_u32
-define void @s_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
+define amdgpu_kernel void @s_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
%ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %ssub, 0
%carry = extractvalue { i64, i1 } %ssub, 1
@@ -53,7 +53,7 @@ define void @s_ssubo_i64(i64 addrspace(1
; FUNC-LABEL: {{^}}v_ssubo_i64:
; SI: v_sub_i32_e32
; SI: v_subb_u32_e32
-define void @v_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
+define amdgpu_kernel void @v_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%a = load i64, i64 addrspace(1)* %aptr, align 4
%b = load i64, i64 addrspace(1)* %bptr, align 4
%ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind
Modified: llvm/trunk/test/CodeGen/AMDGPU/store-barrier.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/store-barrier.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/store-barrier.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/store-barrier.ll Tue Mar 21 16:39:51 2017
@@ -12,7 +12,7 @@
; CHECK: s_barrier
; CHECK: s_endpgm
; Function Attrs: nounwind
-define void @test(<2 x i8> addrspace(3)* nocapture %arg, <2 x i8> addrspace(1)* nocapture readonly %arg1, i32 addrspace(1)* nocapture readonly %arg2, <2 x i8> addrspace(1)* nocapture %arg3, i32 %arg4, i64 %tmp9) #0 {
+define amdgpu_kernel void @test(<2 x i8> addrspace(3)* nocapture %arg, <2 x i8> addrspace(1)* nocapture readonly %arg1, i32 addrspace(1)* nocapture readonly %arg2, <2 x i8> addrspace(1)* nocapture %arg3, i32 %arg4, i64 %tmp9) #0 {
bb:
%tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp9
%tmp13 = load i32, i32 addrspace(1)* %tmp10, align 2
Modified: llvm/trunk/test/CodeGen/AMDGPU/store-global.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/store-global.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/store-global.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/store-global.ll Tue Mar 21 16:39:51 2017
@@ -11,7 +11,7 @@
; CM-NOT: MEM_RAT MSKOR
; GCN: buffer_store_byte
-define void @store_i1(i1 addrspace(1)* %out) {
+define amdgpu_kernel void @store_i1(i1 addrspace(1)* %out) {
entry:
store i1 true, i1 addrspace(1)* %out
ret void
@@ -42,7 +42,7 @@ entry:
; GCN: buffer_store_byte
-define void @store_i8(i8 addrspace(1)* %out, i8 %in) {
+define amdgpu_kernel void @store_i8(i8 addrspace(1)* %out, i8 %in) {
entry:
store i8 %in, i8 addrspace(1)* %out
ret void
@@ -75,7 +75,7 @@ entry:
; EG: MOV * T[[RW_GPR]].Z, 0.0
; GCN: buffer_store_short
-define void @store_i16(i16 addrspace(1)* %out, i16 %in) {
+define amdgpu_kernel void @store_i16(i16 addrspace(1)* %out, i16 %in) {
entry:
store i16 %in, i16 addrspace(1)* %out
ret void
@@ -88,7 +88,7 @@ entry:
; EG: MEM_RAT MSKOR
; EG: MEM_RAT MSKOR
-define void @store_i24(i24 addrspace(1)* %out, i24 %in) {
+define amdgpu_kernel void @store_i24(i24 addrspace(1)* %out, i24 %in) {
entry:
store i24 %in, i24 addrspace(1)* %out
ret void
@@ -104,7 +104,7 @@ entry:
; CM: MEM_RAT_CACHELESS STORE_DWORD
; CM-NOT: MEM_RAT
-define void @store_i25(i25 addrspace(1)* %out, i25 %in) {
+define amdgpu_kernel void @store_i25(i25 addrspace(1)* %out, i25 %in) {
entry:
store i25 %in, i25 addrspace(1)* %out
ret void
@@ -119,7 +119,7 @@ entry:
; CM-NOT: MEM_RAT MSKOR
; GCN: buffer_store_short
-define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
+define amdgpu_kernel void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i8>
store <2 x i8> %0, <2 x i8> addrspace(1)* %out
@@ -136,7 +136,7 @@ entry:
; CM-NOT: MEM_RAT MSKOR
; SI: buffer_store_byte
-define void @store_v2i8_unaligned(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
+define amdgpu_kernel void @store_v2i8_unaligned(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i8>
store <2 x i8> %0, <2 x i8> addrspace(1)* %out, align 1
@@ -150,7 +150,7 @@ entry:
; CM: MEM_RAT_CACHELESS STORE_DWORD
; GCN: buffer_store_dword
-define void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
+define amdgpu_kernel void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i16>
store <2 x i16> %0, <2 x i16> addrspace(1)* %out
@@ -170,7 +170,7 @@ entry:
; SI: buffer_store_short
; SI: buffer_store_short
-define void @store_v2i16_unaligned(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
+define amdgpu_kernel void @store_v2i16_unaligned(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i16>
store <2 x i16> %0, <2 x i16> addrspace(1)* %out, align 2
@@ -183,7 +183,7 @@ entry:
; CM: MEM_RAT_CACHELESS STORE_DWORD
; GCN: buffer_store_dword
-define void @store_v4i8(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i8(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i8>
store <4 x i8> %0, <4 x i8> addrspace(1)* %out
@@ -210,7 +210,7 @@ entry:
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI-NOT: buffer_store_dword
-define void @store_v4i8_unaligned(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i8_unaligned(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i8>
store <4 x i8> %0, <4 x i8> addrspace(1)* %out, align 1
@@ -231,7 +231,7 @@ entry:
; SI: buffer_store_short
; SI: buffer_store_short
; SI-NOT: buffer_store_dword
-define void @store_v4i8_halfaligned(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i8_halfaligned(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i8>
store <4 x i8> %0, <4 x i8> addrspace(1)* %out, align 2
@@ -246,7 +246,7 @@ entry:
; GCN: buffer_store_dword
-define void @store_f32(float addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @store_f32(float addrspace(1)* %out, float %in) {
store float %in, float addrspace(1)* %out
ret void
}
@@ -257,7 +257,7 @@ define void @store_f32(float addrspace(1
; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+}}
; GCN: buffer_store_dwordx2
-define void @store_v4i16(<4 x i16> addrspace(1)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i16(<4 x i16> addrspace(1)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i16>
store <4 x i16> %0, <4 x i16> addrspace(1)* %out
@@ -272,7 +272,7 @@ entry:
; GCN: buffer_store_dwordx2
-define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) {
+define amdgpu_kernel void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) {
entry:
%0 = insertelement <2 x float> <float 0.0, float 0.0>, float %a, i32 0
%1 = insertelement <2 x float> %0, float %b, i32 1
@@ -286,7 +286,7 @@ entry:
; EG-DAG: MEM_RAT_CACHELESS STORE_RAW {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}},
; EG-DAG: MEM_RAT_CACHELESS STORE_RAW {{T[0-9]+\.XY}}, {{T[0-9]+\.[XYZW]}},
-define void @store_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a) nounwind {
+define amdgpu_kernel void @store_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a) nounwind {
store <3 x i32> %a, <3 x i32> addrspace(1)* %out, align 16
ret void
}
@@ -299,7 +299,7 @@ define void @store_v3i32(<3 x i32> addrs
; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
; GCN: buffer_store_dwordx4
-define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
entry:
store <4 x i32> %in, <4 x i32> addrspace(1)* %out
ret void
@@ -313,7 +313,7 @@ entry:
; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
; SI: buffer_store_dwordx4
-define void @store_v4i32_unaligned(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i32_unaligned(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
entry:
store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
ret void
@@ -328,7 +328,7 @@ entry:
; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
; GCN: buffer_store_dwordx4
-define void @store_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+define amdgpu_kernel void @store_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%1 = load <4 x float>, <4 x float> addrspace(1) * %in
store <4 x float> %1, <4 x float> addrspace(1)* %out
ret void
@@ -340,7 +340,7 @@ define void @store_v4f32(<4 x float> add
; CM: MEM_RAT MSKOR
; GCN: buffer_store_byte
-define void @store_i64_i8(i8 addrspace(1)* %out, i64 %in) {
+define amdgpu_kernel void @store_i64_i8(i8 addrspace(1)* %out, i64 %in) {
entry:
%0 = trunc i64 %in to i8
store i8 %0, i8 addrspace(1)* %out
@@ -350,7 +350,7 @@ entry:
; FUNC-LABEL: {{^}}store_i64_i16:
; EG: MEM_RAT MSKOR
; GCN: buffer_store_short
-define void @store_i64_i16(i16 addrspace(1)* %out, i64 %in) {
+define amdgpu_kernel void @store_i64_i16(i16 addrspace(1)* %out, i64 %in) {
entry:
%0 = trunc i64 %in to i16
store i16 %0, i16 addrspace(1)* %out
@@ -369,7 +369,7 @@ entry:
; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
; GCN: buffer_store_dwordx2
-define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
+define amdgpu_kernel void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
entry:
%0 = load i32, i32 addrspace(2)* %mem, align 4
%arrayidx1.i = getelementptr inbounds i32, i32 addrspace(2)* %mem, i64 1
@@ -388,7 +388,7 @@ entry:
; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+}}, T{{[0-9]+}}.X
; GCN: buffer_store_dwordx4
-define void @i128-const-store(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @i128-const-store(i32 addrspace(1)* %out) {
entry:
store i32 1, i32 addrspace(1)* %out, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
Modified: llvm/trunk/test/CodeGen/AMDGPU/store-local.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/store-local.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/store-local.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/store-local.ll Tue Mar 21 16:39:51 2017
@@ -9,7 +9,7 @@
; CM: LDS_BYTE_WRITE
; GCN: ds_write_b8
-define void @store_local_i1(i1 addrspace(3)* %out) {
+define amdgpu_kernel void @store_local_i1(i1 addrspace(3)* %out) {
entry:
store i1 true, i1 addrspace(3)* %out
ret void
@@ -21,7 +21,7 @@ entry:
; CM: LDS_BYTE_WRITE
; GCN: ds_write_b8
-define void @store_local_i8(i8 addrspace(3)* %out, i8 %in) {
+define amdgpu_kernel void @store_local_i8(i8 addrspace(3)* %out, i8 %in) {
store i8 %in, i8 addrspace(3)* %out
ret void
}
@@ -32,7 +32,7 @@ define void @store_local_i8(i8 addrspace
; CM: LDS_SHORT_WRITE
; GCN: ds_write_b16
-define void @store_local_i16(i16 addrspace(3)* %out, i16 %in) {
+define amdgpu_kernel void @store_local_i16(i16 addrspace(3)* %out, i16 %in) {
store i16 %in, i16 addrspace(3)* %out
ret void
}
@@ -43,7 +43,7 @@ define void @store_local_i16(i16 addrspa
; CM: LDS_WRITE
; GCN: ds_write_b32
-define void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) {
+define amdgpu_kernel void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) {
entry:
store <2 x i16> %in, <2 x i16> addrspace(3)* %out
ret void
@@ -55,7 +55,7 @@ entry:
; CM: LDS_WRITE
; GCN: ds_write_b32
-define void @store_local_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
+define amdgpu_kernel void @store_local_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
entry:
store <4 x i8> %in, <4 x i8> addrspace(3)* %out
ret void
@@ -78,7 +78,7 @@ entry:
; GCN: ds_write_b8
; GCN: ds_write_b8
; GCN: ds_write_b8
-define void @store_local_v4i8_unaligned(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
+define amdgpu_kernel void @store_local_v4i8_unaligned(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
entry:
store <4 x i8> %in, <4 x i8> addrspace(3)* %out, align 1
ret void
@@ -95,7 +95,7 @@ entry:
; GCN: ds_write_b16
; GCN: ds_write_b16
-define void @store_local_v4i8_halfaligned(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
+define amdgpu_kernel void @store_local_v4i8_halfaligned(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
entry:
store <4 x i8> %in, <4 x i8> addrspace(3)* %out, align 2
ret void
@@ -111,7 +111,7 @@ entry:
; CM-NOT: LDS_WRITE
; GCN: ds_write_b64
-define void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) {
+define amdgpu_kernel void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) {
entry:
store <2 x i32> %in, <2 x i32> addrspace(3)* %out
ret void
@@ -129,7 +129,7 @@ entry:
; CM: LDS_WRITE
; GCN: ds_write2_b64
-define void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
entry:
store <4 x i32> %in, <4 x i32> addrspace(3)* %out
ret void
@@ -148,7 +148,7 @@ entry:
; GCN: ds_write2_b32
; GCN: ds_write2_b32
-define void @store_local_v4i32_align4(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_local_v4i32_align4(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
entry:
store <4 x i32> %in, <4 x i32> addrspace(3)* %out, align 4
ret void
@@ -157,7 +157,7 @@ entry:
; FUNC-LABEL: {{^}}store_local_i64_i8:
; EG: LDS_BYTE_WRITE
; GCN: ds_write_b8
-define void @store_local_i64_i8(i8 addrspace(3)* %out, i64 %in) {
+define amdgpu_kernel void @store_local_i64_i8(i8 addrspace(3)* %out, i64 %in) {
entry:
%0 = trunc i64 %in to i8
store i8 %0, i8 addrspace(3)* %out
@@ -167,7 +167,7 @@ entry:
; FUNC-LABEL: {{^}}store_local_i64_i16:
; EG: LDS_SHORT_WRITE
; GCN: ds_write_b16
-define void @store_local_i64_i16(i16 addrspace(3)* %out, i64 %in) {
+define amdgpu_kernel void @store_local_i64_i16(i16 addrspace(3)* %out, i64 %in) {
entry:
%0 = trunc i64 %in to i16
store i16 %0, i16 addrspace(3)* %out
Modified: llvm/trunk/test/CodeGen/AMDGPU/store-private.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/store-private.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/store-private.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/store-private.ll Tue Mar 21 16:39:51 2017
@@ -15,7 +15,7 @@
; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
; SI: buffer_store_byte
-define void @store_i1(i1 addrspace(0)* %out) {
+define amdgpu_kernel void @store_i1(i1 addrspace(0)* %out) {
entry:
store i1 true, i1 addrspace(0)* %out
ret void
@@ -44,7 +44,7 @@ entry:
; SI: buffer_store_byte
-define void @store_i8(i8 addrspace(0)* %out, i8 %in) {
+define amdgpu_kernel void @store_i8(i8 addrspace(0)* %out, i8 %in) {
entry:
store i8 %in, i8 addrspace(0)* %out
ret void
@@ -72,7 +72,7 @@ entry:
; EG: MOV * T(0 + AR.x).X+, [[RES]]
; SI: buffer_store_short
-define void @store_i16(i16 addrspace(0)* %out, i16 %in) {
+define amdgpu_kernel void @store_i16(i16 addrspace(0)* %out, i16 %in) {
entry:
store i16 %in, i16 addrspace(0)* %out
ret void
@@ -102,7 +102,7 @@ entry:
; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
; CM: MOVA_INT
; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
-define void @store_i24(i24 addrspace(0)* %out, i24 %in) {
+define amdgpu_kernel void @store_i24(i24 addrspace(0)* %out, i24 %in) {
entry:
store i24 %in, i24 addrspace(0)* %out
ret void
@@ -120,7 +120,7 @@ entry:
; CM: MOVA_INT
; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
; CM-NOT: MOVA_INT
-define void @store_i25(i25 addrspace(0)* %out, i25 %in) {
+define amdgpu_kernel void @store_i25(i25 addrspace(0)* %out, i25 %in) {
entry:
store i25 %in, i25 addrspace(0)* %out
ret void
@@ -141,7 +141,7 @@ entry:
; CM-NOT: MOVA_INT
; SI: buffer_store_short
-define void @store_v2i8(<2 x i8> addrspace(0)* %out, <2 x i32> %in) {
+define amdgpu_kernel void @store_v2i8(<2 x i8> addrspace(0)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i8>
store <2 x i8> %0, <2 x i8> addrspace(0)* %out
@@ -172,7 +172,7 @@ entry:
; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
; SI: buffer_store_byte
-define void @store_v2i8_unaligned(<2 x i8> addrspace(0)* %out, <2 x i32> %in) {
+define amdgpu_kernel void @store_v2i8_unaligned(<2 x i8> addrspace(0)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i8>
store <2 x i8> %0, <2 x i8> addrspace(0)* %out, align 1
@@ -191,7 +191,7 @@ entry:
; CM-NOT: MOVA_INT
; SI: buffer_store_dword
-define void @store_v2i16(<2 x i16> addrspace(0)* %out, <2 x i32> %in) {
+define amdgpu_kernel void @store_v2i16(<2 x i16> addrspace(0)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i16>
store <2 x i16> %0, <2 x i16> addrspace(0)* %out
@@ -223,7 +223,7 @@ entry:
; SI: buffer_store_short
; SI: buffer_store_short
-define void @store_v2i16_unaligned(<2 x i16> addrspace(0)* %out, <2 x i32> %in) {
+define amdgpu_kernel void @store_v2i16_unaligned(<2 x i16> addrspace(0)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i16>
store <2 x i16> %0, <2 x i16> addrspace(0)* %out, align 2
@@ -240,7 +240,7 @@ entry:
; CM-NOT: MOVA_INT
; SI: buffer_store_dword
-define void @store_v4i8(<4 x i8> addrspace(0)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i8(<4 x i8> addrspace(0)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i8>
store <4 x i8> %0, <4 x i8> addrspace(0)* %out
@@ -299,7 +299,7 @@ entry:
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI-NOT: buffer_store_dword
-define void @store_v4i8_unaligned(<4 x i8> addrspace(0)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i8_unaligned(<4 x i8> addrspace(0)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i8>
store <4 x i8> %0, <4 x i8> addrspace(0)* %out, align 1
@@ -410,7 +410,7 @@ entry:
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI-NOT: buffer_store_dword
-define void @store_v8i8_unaligned(<8 x i8> addrspace(0)* %out, <8 x i32> %in) {
+define amdgpu_kernel void @store_v8i8_unaligned(<8 x i8> addrspace(0)* %out, <8 x i32> %in) {
entry:
%0 = trunc <8 x i32> %in to <8 x i8>
store <8 x i8> %0, <8 x i8> addrspace(0)* %out, align 1
@@ -443,7 +443,7 @@ entry:
; SI: buffer_store_short
; SI: buffer_store_short
; SI-NOT: buffer_store_dword
-define void @store_v4i8_halfaligned(<4 x i8> addrspace(0)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i8_halfaligned(<4 x i8> addrspace(0)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i8>
store <4 x i8> %0, <4 x i8> addrspace(0)* %out, align 2
@@ -460,7 +460,7 @@ entry:
; SI: buffer_store_dword
-define void @store_f32(float addrspace(0)* %out, float %in) {
+define amdgpu_kernel void @store_f32(float addrspace(0)* %out, float %in) {
store float %in, float addrspace(0)* %out
ret void
}
@@ -480,7 +480,7 @@ define void @store_f32(float addrspace(0
; XSI: buffer_store_dwordx2
; SI: buffer_store_dword
; SI: buffer_store_dword
-define void @store_v4i16(<4 x i16> addrspace(0)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i16(<4 x i16> addrspace(0)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i16>
store <4 x i16> %0, <4 x i16> addrspace(0)* %out
@@ -504,7 +504,7 @@ entry:
; SI: buffer_store_dword
; SI: buffer_store_dword
-define void @store_v2f32(<2 x float> addrspace(0)* %out, float %a, float %b) {
+define amdgpu_kernel void @store_v2f32(<2 x float> addrspace(0)* %out, float %a, float %b) {
entry:
%0 = insertelement <2 x float> <float 0.0, float 0.0>, float %a, i32 0
%1 = insertelement <2 x float> %0, float %b, i32 1
@@ -533,7 +533,7 @@ entry:
; SI: buffer_store_dword
; SI: buffer_store_dword
-define void @store_v3i32(<3 x i32> addrspace(0)* %out, <3 x i32> %a) nounwind {
+define amdgpu_kernel void @store_v3i32(<3 x i32> addrspace(0)* %out, <3 x i32> %a) nounwind {
store <3 x i32> %a, <3 x i32> addrspace(0)* %out, align 16
ret void
}
@@ -563,7 +563,7 @@ define void @store_v3i32(<3 x i32> addrs
; SI: buffer_store_dword
; SI: buffer_store_dword
; SI: buffer_store_dword
-define void @store_v4i32(<4 x i32> addrspace(0)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i32(<4 x i32> addrspace(0)* %out, <4 x i32> %in) {
entry:
store <4 x i32> %in, <4 x i32> addrspace(0)* %out
ret void
@@ -594,7 +594,7 @@ entry:
; SI: buffer_store_dword
; SI: buffer_store_dword
; SI: buffer_store_dword
-define void @store_v4i32_unaligned(<4 x i32> addrspace(0)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @store_v4i32_unaligned(<4 x i32> addrspace(0)* %out, <4 x i32> %in) {
entry:
store <4 x i32> %in, <4 x i32> addrspace(0)* %out, align 4
ret void
@@ -626,7 +626,7 @@ entry:
; SI: buffer_store_dword
; SI: buffer_store_dword
; SI: buffer_store_dword
-define void @store_v4f32(<4 x float> addrspace(0)* %out, <4 x float> addrspace(0)* %in) {
+define amdgpu_kernel void @store_v4f32(<4 x float> addrspace(0)* %out, <4 x float> addrspace(0)* %in) {
%1 = load <4 x float>, <4 x float> addrspace(0) * %in
store <4 x float> %1, <4 x float> addrspace(0)* %out
ret void
@@ -644,7 +644,7 @@ define void @store_v4f32(<4 x float> add
; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
; SI: buffer_store_byte
-define void @store_i64_i8(i8 addrspace(0)* %out, i64 %in) {
+define amdgpu_kernel void @store_i64_i8(i8 addrspace(0)* %out, i64 %in) {
entry:
%0 = trunc i64 %in to i8
store i8 %0, i8 addrspace(0)* %out
@@ -663,7 +663,7 @@ entry:
; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
; SI: buffer_store_short
-define void @store_i64_i16(i16 addrspace(0)* %out, i64 %in) {
+define amdgpu_kernel void @store_i64_i16(i16 addrspace(0)* %out, i64 %in) {
entry:
%0 = trunc i64 %in to i16
store i16 %0, i16 addrspace(0)* %out
@@ -689,7 +689,7 @@ entry:
; XSI: buffer_store_dwordx2
; SI: buffer_store_dword
; SI: buffer_store_dword
-define void @vecload2(i32 addrspace(0)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
+define amdgpu_kernel void @vecload2(i32 addrspace(0)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
entry:
%0 = load i32, i32 addrspace(2)* %mem, align 4
%arrayidx1.i = getelementptr inbounds i32, i32 addrspace(2)* %mem, i64 1
@@ -727,7 +727,7 @@ entry:
; SI: buffer_store_dword
; SI: buffer_store_dword
; SI: buffer_store_dword
-define void @i128-const-store(i32 addrspace(0)* %out) {
+define amdgpu_kernel void @i128-const-store(i32 addrspace(0)* %out) {
entry:
store i32 1, i32 addrspace(0)* %out, align 4
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(0)* %out, i64 1
Modified: llvm/trunk/test/CodeGen/AMDGPU/store-v3i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/store-v3i64.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/store-v3i64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/store-v3i64.ll Tue Mar 21 16:39:51 2017
@@ -5,7 +5,7 @@
; GCN-LABEL: {{^}}global_store_v3i64:
; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-define void @global_store_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> %x) {
+define amdgpu_kernel void @global_store_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> %x) {
store <3 x i64> %x, <3 x i64> addrspace(1)* %out, align 32
ret void
}
@@ -40,7 +40,7 @@ define void @global_store_v3i64(<3 x i64
; GCN: buffer_store_byte
; GCN: buffer_store_byte
; GCN: buffer_store_byte
-define void @global_store_v3i64_unaligned(<3 x i64> addrspace(1)* %out, <3 x i64> %x) {
+define amdgpu_kernel void @global_store_v3i64_unaligned(<3 x i64> addrspace(1)* %out, <3 x i64> %x) {
store <3 x i64> %x, <3 x i64> addrspace(1)* %out, align 1
ret void
}
@@ -48,7 +48,7 @@ define void @global_store_v3i64_unaligne
; GCN-LABEL: {{^}}local_store_v3i64:
; GCN: ds_write2_b64
; GCN: ds_write_b64
-define void @local_store_v3i64(<3 x i64> addrspace(3)* %out, <3 x i64> %x) {
+define amdgpu_kernel void @local_store_v3i64(<3 x i64> addrspace(3)* %out, <3 x i64> %x) {
store <3 x i64> %x, <3 x i64> addrspace(3)* %out, align 32
ret void
}
@@ -83,7 +83,7 @@ define void @local_store_v3i64(<3 x i64>
; GCN: ds_write_b8
; GCN: ds_write_b8
; GCN: ds_write_b8
-define void @local_store_v3i64_unaligned(<3 x i64> addrspace(3)* %out, <3 x i64> %x) {
+define amdgpu_kernel void @local_store_v3i64_unaligned(<3 x i64> addrspace(3)* %out, <3 x i64> %x) {
store <3 x i64> %x, <3 x i64> addrspace(3)* %out, align 1
ret void
}
@@ -91,7 +91,7 @@ define void @local_store_v3i64_unaligned
; GCN-LABEL: {{^}}global_truncstore_v3i64_to_v3i32:
; GCN-DAG: buffer_store_dwordx2
; GCN-DAG: buffer_store_dword v
-define void @global_truncstore_v3i64_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i64> %x) {
+define amdgpu_kernel void @global_truncstore_v3i64_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i64> %x) {
%trunc = trunc <3 x i64> %x to <3 x i32>
store <3 x i32> %trunc, <3 x i32> addrspace(1)* %out
ret void
@@ -100,7 +100,7 @@ define void @global_truncstore_v3i64_to_
; GCN-LABEL: {{^}}global_truncstore_v3i64_to_v3i16:
; GCN-DAG: buffer_store_short
; GCN-DAG: buffer_store_dword v
-define void @global_truncstore_v3i64_to_v3i16(<3 x i16> addrspace(1)* %out, <3 x i64> %x) {
+define amdgpu_kernel void @global_truncstore_v3i64_to_v3i16(<3 x i16> addrspace(1)* %out, <3 x i64> %x) {
%trunc = trunc <3 x i64> %x to <3 x i16>
store <3 x i16> %trunc, <3 x i16> addrspace(1)* %out
ret void
@@ -110,7 +110,7 @@ define void @global_truncstore_v3i64_to_
; GCN-LABEL: {{^}}global_truncstore_v3i64_to_v3i8:
; GCN-DAG: buffer_store_short
; GCN-DAG: buffer_store_byte v
-define void @global_truncstore_v3i64_to_v3i8(<3 x i8> addrspace(1)* %out, <3 x i64> %x) {
+define amdgpu_kernel void @global_truncstore_v3i64_to_v3i8(<3 x i8> addrspace(1)* %out, <3 x i64> %x) {
%trunc = trunc <3 x i64> %x to <3 x i8>
store <3 x i8> %trunc, <3 x i8> addrspace(1)* %out
ret void
@@ -120,7 +120,7 @@ define void @global_truncstore_v3i64_to_
; GCN-DAG: buffer_store_byte v
; GCN-DAG: buffer_store_byte v
; GCN-DAG: buffer_store_byte v
-define void @global_truncstore_v3i64_to_v3i1(<3 x i1> addrspace(1)* %out, <3 x i64> %x) {
+define amdgpu_kernel void @global_truncstore_v3i64_to_v3i1(<3 x i1> addrspace(1)* %out, <3 x i64> %x) {
%trunc = trunc <3 x i64> %x to <3 x i1>
store <3 x i1> %trunc, <3 x i1> addrspace(1)* %out
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/store-vector-ptrs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/store-vector-ptrs.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/store-vector-ptrs.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/store-vector-ptrs.ll Tue Mar 21 16:39:51 2017
@@ -5,7 +5,7 @@
; AMDGPUDAGToDAGISel::SelectMUBUFScratch() which is used for selecting
; scratch loads and stores.
; CHECK-LABEL: {{^}}store_vector_ptrs:
-define void @store_vector_ptrs(<4 x i32*>* %out, <4 x [1024 x i32]*> %array) nounwind {
+define amdgpu_kernel void @store_vector_ptrs(<4 x i32*>* %out, <4 x [1024 x i32]*> %array) nounwind {
%p = getelementptr [1024 x i32], <4 x [1024 x i32]*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
store <4 x i32*> %p, <4 x i32*>* %out
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/store_typed.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/store_typed.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/store_typed.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/store_typed.ll Tue Mar 21 16:39:51 2017
@@ -6,7 +6,7 @@
; EG: MEM_RAT STORE_TYPED RAT(0) {{T[0-9]+, T[0-9]+}}, 1
; CM: MEM_RAT STORE_TYPED RAT(0) {{T[0-9]+, T[0-9]+}}
-define void @store_typed_rat0(<4 x i32> %data, <4 x i32> %index) {
+define amdgpu_kernel void @store_typed_rat0(<4 x i32> %data, <4 x i32> %index) {
call void @llvm.r600.rat.store.typed(<4 x i32> %data, <4 x i32> %index, i32 0)
ret void
}
@@ -16,7 +16,7 @@ define void @store_typed_rat0(<4 x i32>
; EG: MEM_RAT STORE_TYPED RAT(11) {{T[0-9]+, T[0-9]+}}, 1
; CM: MEM_RAT STORE_TYPED RAT(11) {{T[0-9]+, T[0-9]+}}
-define void @store_typed_rat11(<4 x i32> %data, <4 x i32> %index) {
+define amdgpu_kernel void @store_typed_rat11(<4 x i32> %data, <4 x i32> %index) {
call void @llvm.r600.rat.store.typed(<4 x i32> %data, <4 x i32> %index, i32 11)
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/structurize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/structurize.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/structurize.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/structurize.ll Tue Mar 21 16:39:51 2017
@@ -45,7 +45,7 @@
; CHECK: CF_END
-define void @branch_into_diamond(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
+define amdgpu_kernel void @branch_into_diamond(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
entry:
%0 = icmp ne i32 %a, 0
br i1 %0, label %diamond_head, label %branch_from
Modified: llvm/trunk/test/CodeGen/AMDGPU/structurize1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/structurize1.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/structurize1.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/structurize1.ll Tue Mar 21 16:39:51 2017
@@ -19,7 +19,7 @@
; CHECK-LABEL: {{^}}if_inside_loop:
; CHECK: LOOP_START_DX10
; CHECK: END_LOOP
-define void @if_inside_loop(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
+define amdgpu_kernel void @if_inside_loop(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
entry:
br label %for.body
Modified: llvm/trunk/test/CodeGen/AMDGPU/sub.i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sub.i16.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sub.i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sub.i16.ll Tue Mar 21 16:39:51 2017
@@ -7,7 +7,7 @@
; VI: flat_load_ushort [[B:v[0-9]+]]
; VI: v_subrev_u16_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
; VI-NEXT: buffer_store_short [[ADD]]
-define void @v_test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
+define amdgpu_kernel void @v_test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
@@ -24,7 +24,7 @@ define void @v_test_sub_i16(i16 addrspac
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0xffffff85, [[A]]
; VI-NEXT: buffer_store_short [[ADD]]
-define void @v_test_sub_i16_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
+define amdgpu_kernel void @v_test_sub_i16_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
@@ -39,7 +39,7 @@ define void @v_test_sub_i16_constant(i16
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0x34d, [[A]]
; VI-NEXT: buffer_store_short [[ADD]]
-define void @v_test_sub_i16_neg_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
+define amdgpu_kernel void @v_test_sub_i16_neg_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
@@ -54,7 +54,7 @@ define void @v_test_sub_i16_neg_constant
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: v_subrev_u16_e32 [[ADD:v[0-9]+]], 63, [[A]]
; VI-NEXT: buffer_store_short [[ADD]]
-define void @v_test_sub_i16_inline_63(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
+define amdgpu_kernel void @v_test_sub_i16_inline_63(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
@@ -70,7 +70,7 @@ define void @v_test_sub_i16_inline_63(i1
; VI: flat_load_ushort [[B:v[0-9]+]]
; VI: v_subrev_u16_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
; VI-NEXT: buffer_store_dword [[ADD]]
-define void @v_test_sub_i16_zext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
+define amdgpu_kernel void @v_test_sub_i16_zext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
@@ -90,7 +90,7 @@ define void @v_test_sub_i16_zext_to_i32(
; VI-DAG: v_subrev_u16_e32 v[[ADD:[0-9]+]], [[B]], [[A]]
; VI-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0
; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:[[VZERO]]{{\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
-define void @v_test_sub_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
+define amdgpu_kernel void @v_test_sub_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
@@ -110,7 +110,7 @@ define void @v_test_sub_i16_zext_to_i64(
; VI: v_subrev_u16_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
; VI-NEXT: v_bfe_i32 [[SEXT:v[0-9]+]], [[ADD]], 0, 16
; VI-NEXT: buffer_store_dword [[SEXT]]
-define void @v_test_sub_i16_sext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
+define amdgpu_kernel void @v_test_sub_i16_sext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
@@ -131,7 +131,7 @@ define void @v_test_sub_i16_sext_to_i32(
; VI-NEXT: v_bfe_i32 v[[LO:[0-9]+]], [[ADD]], 0, 16
; VI-NEXT: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @v_test_sub_i16_sext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
+define amdgpu_kernel void @v_test_sub_i16_sext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %tid
%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
@@ -149,7 +149,7 @@ define void @v_test_sub_i16_sext_to_i64(
; GCN-LABEL: {{^}}v_test_sub_i16_constant_commute:
; VI: v_subrev_u16_e32 v{{[0-9]+}}, 0x800, v{{[0-9]+}}
; CI: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 0x800, v{{[0-9]+}}
-define void @v_test_sub_i16_constant_commute(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
+define amdgpu_kernel void @v_test_sub_i16_constant_commute(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
%size = call i32 @llvm.amdgcn.groupstaticsize()
%size.trunc = trunc i32 %size to i16
call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
Modified: llvm/trunk/test/CodeGen/AMDGPU/sub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sub.ll?rev=298444&r1=298443&r2=298444&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sub.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sub.ll Tue Mar 21 16:39:51 2017
@@ -8,7 +8,7 @@ declare i32 @llvm.r600.read.tidig.x() re
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; SI: v_subrev_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
-define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%a = load i32, i32 addrspace(1)* %in
%b = load i32, i32 addrspace(1)* %b_ptr
@@ -25,7 +25,7 @@ define void @test_sub_i32(i32 addrspace(
; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
-define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
%a = load <2 x i32>, <2 x i32> addrspace(1) * %in
%b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
@@ -45,7 +45,7 @@ define void @test_sub_v2i32(<2 x i32> ad
; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
-define void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
%a = load <4 x i32>, <4 x i32> addrspace(1) * %in
%b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
@@ -55,7 +55,7 @@ define void @test_sub_v4i32(<4 x i32> ad
}
; VI: v_sub_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-define void @test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
+define amdgpu_kernel void @test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
%b_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
%a = load i16, i16 addrspace(1)* %in
%b = load i16, i16 addrspace(1)* %b_ptr
@@ -69,7 +69,7 @@ define void @test_sub_i16(i16 addrspace(
; VI: v_sub_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; VI: v_sub_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-define void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i16 1
%a = load <2 x i16>, <2 x i16> addrspace(1) * %in
%b = load <2 x i16>, <2 x i16> addrspace(1) * %b_ptr
@@ -85,7 +85,7 @@ define void @test_sub_v2i16(<2 x i16> ad
; VI: v_sub_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; VI: v_sub_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-define void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i16 1
%a = load <4 x i16>, <4 x i16> addrspace(1) * %in
%b = load <4 x i16>, <4 x i16> addrspace(1) * %b_ptr
@@ -103,7 +103,7 @@ define void @test_sub_v4i16(<4 x i16> ad
; EG-DAG: SUBB_UINT
; EG-DAG: SUB_INT
; EG-DAG: SUB_INT {{[* ]*}}
-define void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind {
+define amdgpu_kernel void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind {
%result = sub i64 %a, %b
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -118,7 +118,7 @@ define void @s_sub_i64(i64 addrspace(1)*
; EG-DAG: SUBB_UINT
; EG-DAG: SUB_INT
; EG-DAG: SUB_INT {{[* ]*}}
-define void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind {
+define amdgpu_kernel void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
%a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid
%b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid
@@ -134,7 +134,7 @@ define void @v_sub_i64(i64 addrspace(1)*
; SI: v_subb_u32_e32
; SI: v_sub_i32_e32
; SI: v_subb_u32_e32
-define void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
+define amdgpu_kernel void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
%a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid
%b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid
@@ -154,7 +154,7 @@ define void @v_test_sub_v2i64(<2 x i64>
; SI: v_subb_u32_e32
; SI: v_subrev_i32_e32
; SI: v_subb_u32_e32
-define void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* noalias %inA, <4 x i64> addrspace(1)* noalias %inB) {
+define amdgpu_kernel void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* noalias %inA, <4 x i64> addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
%a_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inA, i32 %tid
%b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inB, i32 %tid
More information about the llvm-commits
mailing list