[llvm] fb1d166 - AMDGPU: Bulk update some generic intrinsic tests to opaque pointers
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 29 15:23:24 PST 2022
Author: Matt Arsenault
Date: 2022-11-29T18:09:59-05:00
New Revision: fb1d166e1d05272c569ed0502befd929fb78dffc
URL: https://github.com/llvm/llvm-project/commit/fb1d166e1d05272c569ed0502befd929fb78dffc
DIFF: https://github.com/llvm/llvm-project/commit/fb1d166e1d05272c569ed0502befd929fb78dffc.diff
LOG: AMDGPU: Bulk update some generic intrinsic tests to opaque pointers
Done purely with the script.
Added:
Modified:
llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.cos.ll
llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll
llvm/test/CodeGen/AMDGPU/llvm.exp2.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.exp2.ll
llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
llvm/test/CodeGen/AMDGPU/llvm.log.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.log.ll
llvm/test/CodeGen/AMDGPU/llvm.log10.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.log10.ll
llvm/test/CodeGen/AMDGPU/llvm.log2.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.log2.ll
llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll
llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
llvm/test/CodeGen/AMDGPU/llvm.pow-gfx9.ll
llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.rint.f64.ll
llvm/test/CodeGen/AMDGPU/llvm.rint.ll
llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
llvm/test/CodeGen/AMDGPU/llvm.round.ll
llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.sin.ll
llvm/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
index 49f100f112fe..ac287359b211 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
@@ -13,12 +13,12 @@ declare <2 x half> @llvm.ceil.v2f16(<2 x half> %a)
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @ceil_f16(
- half addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%r.val = call half @llvm.ceil.f16(half %a.val)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -43,11 +43,11 @@ entry:
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @ceil_v2f16(
- <2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ %a.val = load <2 x half>, ptr addrspace(1) %a
%r.val = call <2 x half> @llvm.ceil.v2f16(<2 x half> %a.val)
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll
index eaf632db7872..23e11c5bf393 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll
@@ -5,7 +5,7 @@
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX11 %s
-define amdgpu_kernel void @cos_f16(half addrspace(1)* %r, half addrspace(1)* %a) {
+define amdgpu_kernel void @cos_f16(ptr addrspace(1) %r, ptr addrspace(1) %a) {
; GFX6-LABEL: cos_f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -81,13 +81,13 @@ define amdgpu_kernel void @cos_f16(half addrspace(1)* %r, half addrspace(1)* %a)
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%r.val = call half @llvm.cos.f16(half %a.val)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
-define amdgpu_kernel void @cos_v2f16(<2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a) {
+define amdgpu_kernel void @cos_v2f16(ptr addrspace(1) %r, ptr addrspace(1) %a) {
; GFX6-LABEL: cos_v2f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -190,9 +190,9 @@ define amdgpu_kernel void @cos_v2f16(<2 x half> addrspace(1)* %r, <2 x half> add
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ %a.val = load <2 x half>, ptr addrspace(1) %a
%r.val = call <2 x half> @llvm.cos.v2f16(<2 x half> %a.val)
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.cos.ll b/llvm/test/CodeGen/AMDGPU/llvm.cos.ll
index bd89502d7b82..c5dc863e2bcf 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.cos.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.cos.ll
@@ -11,9 +11,9 @@
;SI: v_cos_f32
;SI-NOT: v_cos_f32
-define amdgpu_kernel void @test(float addrspace(1)* %out, float %x) #1 {
+define amdgpu_kernel void @test(ptr addrspace(1) %out, float %x) #1 {
%cos = call float @llvm.cos.f32(float %x)
- store float %cos, float addrspace(1)* %out
+ store float %cos, ptr addrspace(1) %out
ret void
}
@@ -29,9 +29,9 @@ define amdgpu_kernel void @test(float addrspace(1)* %out, float %x) #1 {
;SI: v_cos_f32
;SI-NOT: v_cos_f32
-define amdgpu_kernel void @testv(<4 x float> addrspace(1)* %out, <4 x float> inreg %vx) #1 {
+define amdgpu_kernel void @testv(ptr addrspace(1) %out, <4 x float> inreg %vx) #1 {
%cos = call <4 x float> @llvm.cos.v4f32(<4 x float> %vx)
- store <4 x float> %cos, <4 x float> addrspace(1)* %out
+ store <4 x float> %cos, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll
index 697fdfb3695e..d8358db7e96b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll
@@ -9,10 +9,10 @@
; GCN: flat_store_dword
; GCN: s_endpgm
-define amdgpu_kernel void @test_debug_value(i32 addrspace(1)* nocapture %globalptr_arg) #0 !dbg !4 {
+define amdgpu_kernel void @test_debug_value(ptr addrspace(1) nocapture %globalptr_arg) #0 !dbg !4 {
entry:
- tail call void @llvm.dbg.value(metadata i32 addrspace(1)* %globalptr_arg, metadata !10, metadata !13), !dbg !14
- store i32 123, i32 addrspace(1)* %globalptr_arg, align 4
+ tail call void @llvm.dbg.value(metadata ptr addrspace(1) %globalptr_arg, metadata !10, metadata !13), !dbg !14
+ store i32 123, ptr addrspace(1) %globalptr_arg, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp2.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp2.f16.ll
index 8691d21778b8..01e58ecb1f20 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp2.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp2.f16.ll
@@ -13,12 +13,12 @@ declare <2 x half> @llvm.exp2.v2f16(<2 x half> %a)
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @exp2_f16(
- half addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%r.val = call half @llvm.exp2.f16(half %a.val)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -43,11 +43,11 @@ entry:
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @exp2_v2f16(
- <2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ %a.val = load <2 x half>, ptr addrspace(1) %a
%r.val = call <2 x half> @llvm.exp2.v2f16(<2 x half> %a.val)
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll
index aa0e1e325039..6a4fefb30720 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll
@@ -11,10 +11,10 @@
;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
;SI: v_exp_f32
-define amdgpu_kernel void @test(float addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @test(ptr addrspace(1) %out, float %in) {
entry:
%0 = call float @llvm.exp2.f32(float %in)
- store float %0, float addrspace(1)* %out
+ store float %0, ptr addrspace(1) %out
ret void
}
@@ -34,10 +34,10 @@ entry:
;SI: v_exp_f32
;SI: v_exp_f32
-define amdgpu_kernel void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+define amdgpu_kernel void @testv2(ptr addrspace(1) %out, <2 x float> %in) {
entry:
%0 = call <2 x float> @llvm.exp2.v2f32(<2 x float> %in)
- store <2 x float> %0, <2 x float> addrspace(1)* %out
+ store <2 x float> %0, ptr addrspace(1) %out
ret void
}
@@ -68,10 +68,10 @@ entry:
;SI: v_exp_f32
;SI: v_exp_f32
;SI: v_exp_f32
-define amdgpu_kernel void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+define amdgpu_kernel void @testv4(ptr addrspace(1) %out, <4 x float> %in) {
entry:
%0 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %in)
- store <4 x float> %0, <4 x float> addrspace(1)* %out
+ store <4 x float> %0, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll
index 081dc6173279..e5c927b2acac 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll
@@ -13,12 +13,12 @@ declare <2 x half> @llvm.floor.v2f16(<2 x half> %a)
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @floor_f16(
- half addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%r.val = call half @llvm.floor.f16(half %a.val)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -43,11 +43,11 @@ entry:
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @floor_v2f16(
- <2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ %a.val = load <2 x half>, ptr addrspace(1) %a
%r.val = call <2 x half> @llvm.floor.v2f16(<2 x half> %a.val)
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll
index d4b2d11277e4..d51b4ffd5379 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll
@@ -19,15 +19,15 @@ declare <4 x half> @llvm.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fma_f16(
- half addrspace(1)* %r,
- half addrspace(1)* %a,
- half addrspace(1)* %b,
- half addrspace(1)* %c) {
- %a.val = load half, half addrspace(1)* %a
- %b.val = load half, half addrspace(1)* %b
- %c.val = load half, half addrspace(1)* %c
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a,
+ ptr addrspace(1) %b,
+ ptr addrspace(1) %c) {
+ %a.val = load half, ptr addrspace(1) %a
+ %b.val = load half, ptr addrspace(1) %b
+ %c.val = load half, ptr addrspace(1) %c
%r.val = call half @llvm.fma.f16(half %a.val, half %b.val, half %c.val)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -45,13 +45,13 @@ define amdgpu_kernel void @fma_f16(
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fma_f16_imm_a(
- half addrspace(1)* %r,
- half addrspace(1)* %b,
- half addrspace(1)* %c) {
- %b.val = load half, half addrspace(1)* %b
- %c.val = load half, half addrspace(1)* %c
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %b,
+ ptr addrspace(1) %c) {
+ %b.val = load half, ptr addrspace(1) %b
+ %c.val = load half, ptr addrspace(1) %c
%r.val = call half @llvm.fma.f16(half 3.0, half %b.val, half %c.val)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -68,13 +68,13 @@ define amdgpu_kernel void @fma_f16_imm_a(
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fma_f16_imm_b(
- half addrspace(1)* %r,
- half addrspace(1)* %a,
- half addrspace(1)* %c) {
- %a.val = load half, half addrspace(1)* %a
- %c.val = load half, half addrspace(1)* %c
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a,
+ ptr addrspace(1) %c) {
+ %a.val = load half, ptr addrspace(1) %a
+ %c.val = load half, ptr addrspace(1) %c
%r.val = call half @llvm.fma.f16(half %a.val, half 3.0, half %c.val)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -91,13 +91,13 @@ define amdgpu_kernel void @fma_f16_imm_b(
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fma_f16_imm_c(
- half addrspace(1)* %r,
- half addrspace(1)* %a,
- half addrspace(1)* %b) {
- %a.val = load half, half addrspace(1)* %a
- %b.val = load half, half addrspace(1)* %b
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a,
+ ptr addrspace(1) %b) {
+ %a.val = load half, ptr addrspace(1) %a
+ %b.val = load half, ptr addrspace(1) %b
%r.val = call half @llvm.fma.f16(half %a.val, half %b.val, half 3.0)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -137,15 +137,15 @@ define amdgpu_kernel void @fma_f16_imm_c(
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fma_v2f16(
- <2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a,
- <2 x half> addrspace(1)* %b,
- <2 x half> addrspace(1)* %c) {
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
- %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
- %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a,
+ ptr addrspace(1) %b,
+ ptr addrspace(1) %c) {
+ %a.val = load <2 x half>, ptr addrspace(1) %a
+ %b.val = load <2 x half>, ptr addrspace(1) %b
+ %c.val = load <2 x half>, ptr addrspace(1) %c
%r.val = call <2 x half> @llvm.fma.v2f16(<2 x half> %a.val, <2 x half> %b.val, <2 x half> %c.val)
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
@@ -184,13 +184,13 @@ define amdgpu_kernel void @fma_v2f16(
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fma_v2f16_imm_a(
- <2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %b,
- <2 x half> addrspace(1)* %c) {
- %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
- %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %b,
+ ptr addrspace(1) %c) {
+ %b.val = load <2 x half>, ptr addrspace(1) %b
+ %c.val = load <2 x half>, ptr addrspace(1) %c
%r.val = call <2 x half> @llvm.fma.v2f16(<2 x half> <half 3.0, half 3.0>, <2 x half> %b.val, <2 x half> %c.val)
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
@@ -230,13 +230,13 @@ define amdgpu_kernel void @fma_v2f16_imm_a(
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fma_v2f16_imm_b(
- <2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a,
- <2 x half> addrspace(1)* %c) {
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
- %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a,
+ ptr addrspace(1) %c) {
+ %a.val = load <2 x half>, ptr addrspace(1) %a
+ %c.val = load <2 x half>, ptr addrspace(1) %c
%r.val = call <2 x half> @llvm.fma.v2f16(<2 x half> %a.val, <2 x half> <half 3.0, half 3.0>, <2 x half> %c.val)
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
@@ -280,13 +280,13 @@ define amdgpu_kernel void @fma_v2f16_imm_b(
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fma_v2f16_imm_c(
- <2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a,
- <2 x half> addrspace(1)* %b) {
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
- %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a,
+ ptr addrspace(1) %b) {
+ %a.val = load <2 x half>, ptr addrspace(1) %a
+ %b.val = load <2 x half>, ptr addrspace(1) %b
%r.val = call <2 x half> @llvm.fma.v2f16(<2 x half> %a.val, <2 x half> %b.val, <2 x half> <half 3.0, half 3.0>)
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
@@ -349,14 +349,14 @@ define amdgpu_kernel void @fma_v2f16_imm_c(
; GCN: s_endpgm
define amdgpu_kernel void @fma_v4f16(
- <4 x half> addrspace(1)* %r,
- <4 x half> addrspace(1)* %a,
- <4 x half> addrspace(1)* %b,
- <4 x half> addrspace(1)* %c) {
- %a.val = load <4 x half>, <4 x half> addrspace(1)* %a
- %b.val = load <4 x half>, <4 x half> addrspace(1)* %b
- %c.val = load <4 x half>, <4 x half> addrspace(1)* %c
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a,
+ ptr addrspace(1) %b,
+ ptr addrspace(1) %c) {
+ %a.val = load <4 x half>, ptr addrspace(1) %a
+ %b.val = load <4 x half>, ptr addrspace(1) %b
+ %c.val = load <4 x half>, ptr addrspace(1) %c
%r.val = call <4 x half> @llvm.fma.v4f16(<4 x half> %a.val, <4 x half> %b.val, <4 x half> %c.val)
- store <4 x half> %r.val, <4 x half> addrspace(1)* %r
+ store <4 x half> %r.val, ptr addrspace(1) %r
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
index 183b26bad7a7..017bc0cb2b0a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
@@ -36,15 +36,15 @@ declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half>
; GCN: s_endpgm
define amdgpu_kernel void @fmuladd_f16(
- half addrspace(1)* %r,
- half addrspace(1)* %a,
- half addrspace(1)* %b,
- half addrspace(1)* %c) {
- %a.val = load half, half addrspace(1)* %a
- %b.val = load half, half addrspace(1)* %b
- %c.val = load half, half addrspace(1)* %c
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a,
+ ptr addrspace(1) %b,
+ ptr addrspace(1) %c) {
+ %a.val = load half, ptr addrspace(1) %a
+ %b.val = load half, ptr addrspace(1) %b
+ %c.val = load half, ptr addrspace(1) %c
%r.val = call half @llvm.fmuladd.f16(half %a.val, half %b.val, half %c.val)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -73,13 +73,13 @@ define amdgpu_kernel void @fmuladd_f16(
; GCN: s_endpgm
define amdgpu_kernel void @fmuladd_f16_imm_a(
- half addrspace(1)* %r,
- half addrspace(1)* %b,
- half addrspace(1)* %c) {
- %b.val = load volatile half, half addrspace(1)* %b
- %c.val = load volatile half, half addrspace(1)* %c
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %b,
+ ptr addrspace(1) %c) {
+ %b.val = load volatile half, ptr addrspace(1) %b
+ %c.val = load volatile half, ptr addrspace(1) %c
%r.val = call half @llvm.fmuladd.f16(half 3.0, half %b.val, half %c.val)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -108,13 +108,13 @@ define amdgpu_kernel void @fmuladd_f16_imm_a(
; GCN: s_endpgm
define amdgpu_kernel void @fmuladd_f16_imm_b(
- half addrspace(1)* %r,
- half addrspace(1)* %a,
- half addrspace(1)* %c) {
- %a.val = load volatile half, half addrspace(1)* %a
- %c.val = load volatile half, half addrspace(1)* %c
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a,
+ ptr addrspace(1) %c) {
+ %a.val = load volatile half, ptr addrspace(1) %a
+ %c.val = load volatile half, ptr addrspace(1) %c
%r.val = call half @llvm.fmuladd.f16(half %a.val, half 3.0, half %c.val)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -176,14 +176,14 @@ define amdgpu_kernel void @fmuladd_f16_imm_b(
; GCN: buffer_store_{{dword|b32}} v[[R_V2_F16]]
define amdgpu_kernel void @fmuladd_v2f16(
- <2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a,
- <2 x half> addrspace(1)* %b,
- <2 x half> addrspace(1)* %c) {
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
- %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
- %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a,
+ ptr addrspace(1) %b,
+ ptr addrspace(1) %c) {
+ %a.val = load <2 x half>, ptr addrspace(1) %a
+ %b.val = load <2 x half>, ptr addrspace(1) %b
+ %c.val = load <2 x half>, ptr addrspace(1) %c
%r.val = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a.val, <2 x half> %b.val, <2 x half> %c.val)
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
index b4787f3eefe5..6fdfc99266a9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
@@ -3,7 +3,7 @@
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s
-define amdgpu_gs void @test_fptrunc_round_upward(float %a, i32 %data0, <4 x i32> %data1, half addrspace(1)* %out) {
+define amdgpu_gs void @test_fptrunc_round_upward(float %a, i32 %data0, <4 x i32> %data1, ptr addrspace(1) %out) {
; CHECK-LABEL: test_fptrunc_round_upward:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
@@ -11,11 +11,11 @@ define amdgpu_gs void @test_fptrunc_round_upward(float %a, i32 %data0, <4 x i32>
; CHECK-NEXT: global_store_short v[6:7], v0, off
; CHECK-NEXT: s_endpgm
%res = call half @llvm.fptrunc.round(float %a, metadata !"round.upward")
- store half %res, half addrspace(1)* %out, align 4
+ store half %res, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_gs void @test_fptrunc_round_downward(float %a, i32 %data0, <4 x i32> %data1, half addrspace(1)* %out) {
+define amdgpu_gs void @test_fptrunc_round_downward(float %a, i32 %data0, <4 x i32> %data1, ptr addrspace(1) %out) {
; CHECK-LABEL: test_fptrunc_round_downward:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
@@ -23,11 +23,11 @@ define amdgpu_gs void @test_fptrunc_round_downward(float %a, i32 %data0, <4 x i3
; CHECK-NEXT: global_store_short v[6:7], v0, off
; CHECK-NEXT: s_endpgm
%res = call half @llvm.fptrunc.round(float %a, metadata !"round.downward")
- store half %res, half addrspace(1)* %out, align 4
+ store half %res, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_gs void @test_fptrunc_round_upward_multiple_calls(float %a, float %b, i32 %data0, <4 x i32> %data1, half addrspace(1)* %out) {
+define amdgpu_gs void @test_fptrunc_round_upward_multiple_calls(float %a, float %b, i32 %data0, <4 x i32> %data1, ptr addrspace(1) %out) {
; CHECK-LABEL: test_fptrunc_round_upward_multiple_calls:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
@@ -45,7 +45,7 @@ define amdgpu_gs void @test_fptrunc_round_upward_multiple_calls(float %a, float
%res3 = call half @llvm.fptrunc.round(float %b, metadata !"round.downward")
%res4 = fadd half %res1, %res2
%res5 = fadd half %res3, %res4
- store half %res5, half addrspace(1)* %out, align 4
+ store half %res5, ptr addrspace(1) %out, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.f16.ll
index 4a14e556c706..0a8faad183bf 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log.f16.ll
@@ -19,12 +19,12 @@ declare <2 x half> @llvm.log.v2f16(<2 x half> %a)
; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[R_F16_0]]
; GFX9: global_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[R_F16_0]]
define void @log_f16(
- half addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%r.val = call half @llvm.log.f16(half %a.val)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -58,11 +58,11 @@ entry:
; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[R_F32_5]]
; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[R_F32_5]]
define void @log_v2f16(
- <2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ %a.val = load <2 x half>, ptr addrspace(1) %a
%r.val = call <2 x half> @llvm.log.v2f16(<2 x half> %a.val)
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
index 7c6ef1c08077..0867b7ed0f82 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
@@ -11,10 +11,10 @@
; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
; GCN: v_log_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}
; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f317218, v{{[0-9]+}}
-define void @test(float addrspace(1)* %out, float %in) {
+define void @test(ptr addrspace(1) %out, float %in) {
entry:
%res = call float @llvm.log.f32(float %in)
- store float %res, float addrspace(1)* %out
+ store float %res, ptr addrspace(1) %out
ret void
}
@@ -35,10 +35,10 @@ entry:
; GCN-DAG: v_log_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}
; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f317218, v{{[0-9]+}}
; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f317218, v{{[0-9]+}}
-define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+define void @testv2(ptr addrspace(1) %out, <2 x float> %in) {
entry:
%res = call <2 x float> @llvm.log.v2f32(<2 x float> %in)
- store <2 x float> %res, <2 x float> addrspace(1)* %out
+ store <2 x float> %res, ptr addrspace(1) %out
ret void
}
@@ -73,10 +73,10 @@ entry:
; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f317218, v{{[0-9]+}}
; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f317218, v{{[0-9]+}}
; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f317218, v{{[0-9]+}}
-define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+define void @testv4(ptr addrspace(1) %out, <4 x float> %in) {
entry:
%res = call <4 x float> @llvm.log.v4f32(<4 x float> %in)
- store <4 x float> %res, <4 x float> addrspace(1)* %out
+ store <4 x float> %res, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.f16.ll
index c50552aacefe..a4083d281e4c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log10.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.f16.ll
@@ -19,12 +19,12 @@ declare <2 x half> @llvm.log10.v2f16(<2 x half> %a)
; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[R_F16_0]]
; GFX9: global_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[R_F16_0]]
define void @log10_f16(
- half addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%r.val = call half @llvm.log10.f16(half %a.val)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -59,11 +59,11 @@ entry:
; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[R_F32_5]]
; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[R_F32_5]]
define void @log10_v2f16(
- <2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ %a.val = load <2 x half>, ptr addrspace(1) %a
%r.val = call <2 x half> @llvm.log10.v2f16(<2 x half> %a.val)
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
index e2e55a412b8c..d67a71b61386 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
@@ -11,10 +11,10 @@
; CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
; GCN: v_log_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}
; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3e9a209a, v{{[0-9]+}}
-define void @test(float addrspace(1)* %out, float %in) {
+define void @test(ptr addrspace(1) %out, float %in) {
entry:
%res = call float @llvm.log10.f32(float %in)
- store float %res, float addrspace(1)* %out
+ store float %res, ptr addrspace(1) %out
ret void
}
@@ -35,10 +35,10 @@ entry:
; GCN-DAG: v_log_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}
; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3e9a209a, v{{[0-9]+}}
; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3e9a209a, v{{[0-9]+}}
-define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+define void @testv2(ptr addrspace(1) %out, <2 x float> %in) {
entry:
%res = call <2 x float> @llvm.log10.v2f32(<2 x float> %in)
- store <2 x float> %res, <2 x float> addrspace(1)* %out
+ store <2 x float> %res, ptr addrspace(1) %out
ret void
}
@@ -73,10 +73,10 @@ entry:
; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3e9a209a, v{{[0-9]+}}
; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3e9a209a, v{{[0-9]+}}
; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3e9a209a, v{{[0-9]+}}
-define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+define void @testv4(ptr addrspace(1) %out, <4 x float> %in) {
entry:
%res = call <4 x float> @llvm.log10.v4f32(<4 x float> %in)
- store <4 x float> %res, <4 x float> addrspace(1)* %out
+ store <4 x float> %res, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.f16.ll
index 7228c4044c7a..85b5e3c9e42f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log2.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log2.f16.ll
@@ -13,12 +13,12 @@ declare <2 x half> @llvm.log2.v2f16(<2 x half> %a)
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @log2_f16(
- half addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%r.val = call half @llvm.log2.f16(half %a.val)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -43,11 +43,11 @@ entry:
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @log2_v2f16(
- <2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ %a.val = load <2 x half>, ptr addrspace(1) %a
%r.val = call <2 x half> @llvm.log2.v2f16(<2 x half> %a.val)
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
index bb97a2b26327..87022996755a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
@@ -11,10 +11,10 @@
;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
;SI: v_log_f32
-define amdgpu_kernel void @test(float addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @test(ptr addrspace(1) %out, float %in) {
entry:
%0 = call float @llvm.log2.f32(float %in)
- store float %0, float addrspace(1)* %out
+ store float %0, ptr addrspace(1) %out
ret void
}
@@ -34,10 +34,10 @@ entry:
;SI: v_log_f32
;SI: v_log_f32
-define amdgpu_kernel void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+define amdgpu_kernel void @testv2(ptr addrspace(1) %out, <2 x float> %in) {
entry:
%0 = call <2 x float> @llvm.log2.v2f32(<2 x float> %in)
- store <2 x float> %0, <2 x float> addrspace(1)* %out
+ store <2 x float> %0, ptr addrspace(1) %out
ret void
}
@@ -68,10 +68,10 @@ entry:
;SI: v_log_f32
;SI: v_log_f32
;SI: v_log_f32
-define amdgpu_kernel void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+define amdgpu_kernel void @testv4(ptr addrspace(1) %out, <4 x float> %in) {
entry:
%0 = call <4 x float> @llvm.log2.v4f32(<4 x float> %in)
- store <4 x float> %0, <4 x float> addrspace(1)* %out
+ store <4 x float> %0, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
index 0271fa40c0fb..169914757e39 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
@@ -142,14 +142,14 @@ define amdgpu_kernel void @maxnum_f16(
; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- half addrspace(1)* %r,
- half addrspace(1)* %a,
- half addrspace(1)* %b) #0 {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a,
+ ptr addrspace(1) %b) #0 {
entry:
- %a.val = load volatile half, half addrspace(1)* %a
- %b.val = load volatile half, half addrspace(1)* %b
+ %a.val = load volatile half, ptr addrspace(1) %a
+ %b.val = load volatile half, ptr addrspace(1) %b
%r.val = call half @llvm.maxnum.f16(half %a.val, half %b.val)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -252,12 +252,12 @@ define amdgpu_kernel void @maxnum_f16_imm_a(
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- half addrspace(1)* %r,
- half addrspace(1)* %b) #0 {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %b) #0 {
entry:
- %b.val = load half, half addrspace(1)* %b
+ %b.val = load half, ptr addrspace(1) %b
%r.val = call half @llvm.maxnum.f16(half 3.0, half %b.val)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -360,12 +360,12 @@ define amdgpu_kernel void @maxnum_f16_imm_b(
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- half addrspace(1)* %r,
- half addrspace(1)* %a) #0 {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) #0 {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%r.val = call half @llvm.maxnum.f16(half %a.val, half 4.0)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -476,14 +476,14 @@ define amdgpu_kernel void @maxnum_v2f16(
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- <2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a,
- <2 x half> addrspace(1)* %b) #0 {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a,
+ ptr addrspace(1) %b) #0 {
entry:
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
- %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
+ %a.val = load <2 x half>, ptr addrspace(1) %a
+ %b.val = load <2 x half>, ptr addrspace(1) %b
%r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a.val, <2 x half> %b.val)
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
@@ -569,12 +569,12 @@ define amdgpu_kernel void @maxnum_v2f16_imm_a(
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- <2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %b) #0 {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %b) #0 {
entry:
- %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
+ %b.val = load <2 x half>, ptr addrspace(1) %b
%r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> <half 3.0, half 4.0>, <2 x half> %b.val)
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
@@ -660,12 +660,12 @@ define amdgpu_kernel void @maxnum_v2f16_imm_b(
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- <2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a) #0 {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) #0 {
entry:
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ %a.val = load <2 x half>, ptr addrspace(1) %a
%r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a.val, <2 x half> <half 4.0, half 3.0>)
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
@@ -801,14 +801,14 @@ define amdgpu_kernel void @maxnum_v3f16(
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- <3 x half> addrspace(1)* %r,
- <3 x half> addrspace(1)* %a,
- <3 x half> addrspace(1)* %b) #0 {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a,
+ ptr addrspace(1) %b) #0 {
entry:
- %a.val = load <3 x half>, <3 x half> addrspace(1)* %a
- %b.val = load <3 x half>, <3 x half> addrspace(1)* %b
+ %a.val = load <3 x half>, ptr addrspace(1) %a
+ %b.val = load <3 x half>, ptr addrspace(1) %b
%r.val = call <3 x half> @llvm.maxnum.v3f16(<3 x half> %a.val, <3 x half> %b.val)
- store <3 x half> %r.val, <3 x half> addrspace(1)* %r
+ store <3 x half> %r.val, ptr addrspace(1) %r
ret void
}
@@ -955,14 +955,14 @@ define amdgpu_kernel void @maxnum_v4f16(
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- <4 x half> addrspace(1)* %r,
- <4 x half> addrspace(1)* %a,
- <4 x half> addrspace(1)* %b) #0 {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a,
+ ptr addrspace(1) %b) #0 {
entry:
- %a.val = load <4 x half>, <4 x half> addrspace(1)* %a
- %b.val = load <4 x half>, <4 x half> addrspace(1)* %b
+ %a.val = load <4 x half>, ptr addrspace(1) %a
+ %b.val = load <4 x half>, ptr addrspace(1) %b
%r.val = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %a.val, <4 x half> %b.val)
- store <4 x half> %r.val, <4 x half> addrspace(1)* %r
+ store <4 x half> %r.val, ptr addrspace(1) %r
ret void
}
@@ -1077,12 +1077,12 @@ define amdgpu_kernel void @fmax_v4f16_imm_a(
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- <4 x half> addrspace(1)* %r,
- <4 x half> addrspace(1)* %b) #0 {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %b) #0 {
entry:
- %b.val = load <4 x half>, <4 x half> addrspace(1)* %b
+ %b.val = load <4 x half>, ptr addrspace(1) %b
%r.val = call <4 x half> @llvm.maxnum.v4f16(<4 x half> <half 8.0, half 2.0, half 3.0, half 4.0>, <4 x half> %b.val)
- store <4 x half> %r.val, <4 x half> addrspace(1)* %r
+ store <4 x half> %r.val, ptr addrspace(1) %r
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll b/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll
index f8b331cf2c99..25194c7bf522 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll
@@ -1,9 +1,9 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i1) nounwind
-declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind
-declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(4)* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) nocapture, ptr addrspace(3) nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p1.p2.i64(ptr addrspace(1) nocapture, ptr addrspace(4) nocapture, i64, i1) nounwind
; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1:
@@ -80,10 +80,8 @@ declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace
; SI-DAG: ds_write_b8
; SI: s_endpgm
-define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
- %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
- %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
- call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i1 false) nounwind
+define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align1(ptr addrspace(3) noalias %out, ptr addrspace(3) noalias %in) nounwind {
+ call void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) %out, ptr addrspace(3) %in, i32 32, i1 false) nounwind
ret void
}
@@ -125,10 +123,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace
; SI-DAG: ds_write_b16
; SI: s_endpgm
-define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
- %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
- %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
- call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 2 %bcout, i8 addrspace(3)* align 2 %bcin, i32 32, i1 false) nounwind
+define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align2(ptr addrspace(3) noalias %out, ptr addrspace(3) noalias %in) nounwind {
+ call void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) align 2 %out, ptr addrspace(3) align 2 %in, i32 32, i1 false) nounwind
ret void
}
@@ -144,10 +140,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace
; SI: ds_write2_b32
; SI: s_endpgm
-define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
- %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
- %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
- call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 4 %bcout, i8 addrspace(3)* align 4 %bcin, i32 32, i1 false) nounwind
+define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align4(ptr addrspace(3) noalias %out, ptr addrspace(3) noalias %in) nounwind {
+ call void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) align 4 %out, ptr addrspace(3) align 4 %in, i32 32, i1 false) nounwind
ret void
}
@@ -161,10 +155,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace
; SI: ds_write2_b64
; SI-DAG: s_endpgm
-define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
- %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
- %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
- call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 8 %bcout, i8 addrspace(3)* align 8 %bcin, i32 32, i1 false) nounwind
+define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align8(ptr addrspace(3) noalias %out, ptr addrspace(3) noalias %in) nounwind {
+ call void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) align 8 %out, ptr addrspace(3) align 8 %in, i32 32, i1 false) nounwind
ret void
}
@@ -238,10 +230,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace
; SI-DAG: buffer_store_byte
; SI: s_endpgm
-define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
- %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
- %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
- call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i1 false) nounwind
+define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 32, i1 false) nounwind
ret void
}
@@ -281,10 +271,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align1(i64 add
; SI-DAG: buffer_store_short
; SI: s_endpgm
-define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
- %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
- %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
- call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 2 %bcout, i8 addrspace(1)* align 2 %bcin, i64 32, i1 false) nounwind
+define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align2(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 2 %out, ptr addrspace(1) align 2 %in, i64 32, i1 false) nounwind
ret void
}
@@ -294,10 +282,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align2(i64 add
; SI: buffer_store_dwordx4
; SI: buffer_store_dwordx4
; SI: s_endpgm
-define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
- %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
- %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
- call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %bcout, i8 addrspace(1)* align 4 %bcin, i64 32, i1 false) nounwind
+define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align4(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %out, ptr addrspace(1) align 4 %in, i64 32, i1 false) nounwind
ret void
}
@@ -307,10 +293,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align4(i64 add
; SI: buffer_store_dwordx4
; SI: buffer_store_dwordx4
; SI: s_endpgm
-define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
- %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
- %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
- call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 8 %bcout, i8 addrspace(1)* align 8 %bcin, i64 32, i1 false) nounwind
+define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align8(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 8 %out, ptr addrspace(1) align 8 %in, i64 32, i1 false) nounwind
ret void
}
@@ -320,10 +304,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align8(i64 add
; SI: buffer_store_dwordx4
; SI: buffer_store_dwordx4
; SI: s_endpgm
-define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
- %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
- %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
- call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 16 %bcout, i8 addrspace(1)* align 16 %bcin, i64 32, i1 false) nounwind
+define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+ call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 16 %out, ptr addrspace(1) align 16 %in, i64 32, i1 false) nounwind
ret void
}
@@ -340,9 +322,8 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(i64 ad
; SI-DAG: s_load_dwordx2
; SI-DAG: buffer_store_dwordx4
; SI-DAG: buffer_store_dwordx4
-define amdgpu_kernel void @test_memcpy_const_string_align4(i8 addrspace(1)* noalias %out) nounwind {
- %str = bitcast [16 x i8] addrspace(4)* @hello.align4 to i8 addrspace(4)*
- call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* align 4 %out, i8 addrspace(4)* align 4 %str, i64 32, i1 false)
+define amdgpu_kernel void @test_memcpy_const_string_align4(ptr addrspace(1) noalias %out) nounwind {
+ call void @llvm.memcpy.p1.p2.i64(ptr addrspace(1) align 4 %out, ptr addrspace(4) align 4 @hello.align4, i64 32, i1 false)
ret void
}
@@ -365,8 +346,7 @@ define amdgpu_kernel void @test_memcpy_const_string_align4(i8 addrspace(1)* noal
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI: buffer_store_byte
-define amdgpu_kernel void @test_memcpy_const_string_align1(i8 addrspace(1)* noalias %out) nounwind {
- %str = bitcast [16 x i8] addrspace(4)* @hello.align1 to i8 addrspace(4)*
- call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(4)* %str, i64 32, i1 false)
+define amdgpu_kernel void @test_memcpy_const_string_align1(ptr addrspace(1) noalias %out) nounwind {
+ call void @llvm.memcpy.p1.p2.i64(ptr addrspace(1) %out, ptr addrspace(4) @hello.align1, i64 32, i1 false)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
index 4de42687a58e..699ac770b1d6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
@@ -141,14 +141,14 @@ define amdgpu_kernel void @minnum_f16_ieee(
; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- half addrspace(1)* %r,
- half addrspace(1)* %a,
- half addrspace(1)* %b) #0 {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a,
+ ptr addrspace(1) %b) #0 {
entry:
- %a.val = load volatile half, half addrspace(1)* %a
- %b.val = load volatile half, half addrspace(1)* %b
+ %a.val = load volatile half, ptr addrspace(1) %a
+ %b.val = load volatile half, ptr addrspace(1) %b
%r.val = call half @llvm.minnum.f16(half %a.val, half %b.val)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -278,12 +278,12 @@ define amdgpu_kernel void @minnum_f16_imm_a(
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- half addrspace(1)* %r,
- half addrspace(1)* %b) #0 {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %b) #0 {
entry:
- %b.val = load half, half addrspace(1)* %b
+ %b.val = load half, ptr addrspace(1) %b
%r.val = call half @llvm.minnum.f16(half 3.0, half %b.val)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -385,12 +385,12 @@ define amdgpu_kernel void @minnum_f16_imm_b(
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- half addrspace(1)* %r,
- half addrspace(1)* %a) #0 {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) #0 {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%r.val = call half @llvm.minnum.f16(half %a.val, half 4.0)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -500,14 +500,14 @@ define amdgpu_kernel void @minnum_v2f16_ieee(
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- <2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a,
- <2 x half> addrspace(1)* %b) #0 {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a,
+ ptr addrspace(1) %b) #0 {
entry:
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
- %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
+ %a.val = load <2 x half>, ptr addrspace(1) %a
+ %b.val = load <2 x half>, ptr addrspace(1) %b
%r.val = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a.val, <2 x half> %b.val)
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
@@ -627,12 +627,12 @@ define amdgpu_kernel void @minnum_v2f16_imm_a(
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- <2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %b) #0 {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %b) #0 {
entry:
- %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
+ %b.val = load <2 x half>, ptr addrspace(1) %b
%r.val = call <2 x half> @llvm.minnum.v2f16(<2 x half> <half 3.0, half 4.0>, <2 x half> %b.val)
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
@@ -717,12 +717,12 @@ define amdgpu_kernel void @minnum_v2f16_imm_b(
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- <2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a) #0 {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) #0 {
entry:
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ %a.val = load <2 x half>, ptr addrspace(1) %a
%r.val = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a.val, <2 x half> <half 4.0, half 3.0>)
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
@@ -857,14 +857,14 @@ define amdgpu_kernel void @minnum_v3f16(
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- <3 x half> addrspace(1)* %r,
- <3 x half> addrspace(1)* %a,
- <3 x half> addrspace(1)* %b) #0 {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a,
+ ptr addrspace(1) %b) #0 {
entry:
- %a.val = load <3 x half>, <3 x half> addrspace(1)* %a
- %b.val = load <3 x half>, <3 x half> addrspace(1)* %b
+ %a.val = load <3 x half>, ptr addrspace(1) %a
+ %b.val = load <3 x half>, ptr addrspace(1) %b
%r.val = call <3 x half> @llvm.minnum.v3f16(<3 x half> %a.val, <3 x half> %b.val)
- store <3 x half> %r.val, <3 x half> addrspace(1)* %r
+ store <3 x half> %r.val, ptr addrspace(1) %r
ret void
}
@@ -1010,14 +1010,14 @@ define amdgpu_kernel void @minnum_v4f16(
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- <4 x half> addrspace(1)* %r,
- <4 x half> addrspace(1)* %a,
- <4 x half> addrspace(1)* %b) #0 {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a,
+ ptr addrspace(1) %b) #0 {
entry:
- %a.val = load <4 x half>, <4 x half> addrspace(1)* %a
- %b.val = load <4 x half>, <4 x half> addrspace(1)* %b
+ %a.val = load <4 x half>, ptr addrspace(1) %a
+ %b.val = load <4 x half>, ptr addrspace(1) %b
%r.val = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a.val, <4 x half> %b.val)
- store <4 x half> %r.val, <4 x half> addrspace(1)* %r
+ store <4 x half> %r.val, ptr addrspace(1) %r
ret void
}
@@ -1131,12 +1131,12 @@ define amdgpu_kernel void @fmin_v4f16_imm_a(
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- <4 x half> addrspace(1)* %r,
- <4 x half> addrspace(1)* %b) #0 {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %b) #0 {
entry:
- %b.val = load <4 x half>, <4 x half> addrspace(1)* %b
+ %b.val = load <4 x half>, ptr addrspace(1) %b
%r.val = call <4 x half> @llvm.minnum.v4f16(<4 x half> <half 8.0, half 2.0, half 3.0, half 4.0>, <4 x half> %b.val)
- store <4 x half> %r.val, <4 x half> addrspace(1)* %r
+ store <4 x half> %r.val, ptr addrspace(1) %r
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
index fdcceea353bc..8d8a525ab965 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
@@ -383,7 +383,7 @@ bb:
%mul = extractvalue { i64, i1 } %umulo, 0
%overflow = extractvalue { i64, i1 } %umulo, 1
%res = select i1 %overflow, i64 0, i64 %mul
- store i64 %res, i64 addrspace(1)* undef
+ store i64 %res, ptr addrspace(1) undef
ret void
}
@@ -567,7 +567,7 @@ bb:
%mul = extractvalue { i64, i1 } %umulo, 0
%overflow = extractvalue { i64, i1 } %umulo, 1
%res = select i1 %overflow, i64 0, i64 %mul
- store i64 %res, i64 addrspace(1)* undef
+ store i64 %res, ptr addrspace(1) undef
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.pow-gfx9.ll b/llvm/test/CodeGen/AMDGPU/llvm.pow-gfx9.ll
index 657b14d3d1ed..9f2c193c3edc 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.pow-gfx9.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.pow-gfx9.ll
@@ -5,14 +5,14 @@
; GFX908: v_mul_legacy_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GFX90A: v_mul_legacy_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @mul_legacy(
- float addrspace(1)* %r,
- float addrspace(1)* %a,
- float addrspace(1)* %b) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a,
+ ptr addrspace(1) %b) {
entry:
- %a.val = load volatile float, float addrspace(1)* %a
- %b.val = load volatile float, float addrspace(1)* %b
+ %a.val = load volatile float, ptr addrspace(1) %a
+ %b.val = load volatile float, ptr addrspace(1) %b
%r.val = call float @llvm.pow.f32(float %a.val, float %b.val)
- store float %r.val, float addrspace(1)* %r
+ store float %r.val, ptr addrspace(1) %r
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll
index 5b3d5137ac1d..8c82f53de19b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll
@@ -14,12 +14,12 @@ declare <2 x half> @llvm.rint.v2f16(<2 x half> %a)
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @rint_f16(
- half addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%r.val = call half @llvm.rint.f16(half %a.val)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -49,11 +49,11 @@ entry:
; GCN: s_endpgm
define amdgpu_kernel void @rint_v2f16(
- <2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ %a.val = load <2 x half>, ptr addrspace(1) %a
%r.val = call <2 x half> @llvm.rint.v2f16(<2 x half> %a.val)
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.rint.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.rint.f64.ll
index cfffaa0b442a..87ed9560053f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.rint.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.rint.f64.ll
@@ -11,20 +11,20 @@
; SI: v_cndmask_b32
; SI: v_cndmask_b32
; SI: s_endpgm
-define amdgpu_kernel void @rint_f64(double addrspace(1)* %out, double %in) {
+define amdgpu_kernel void @rint_f64(ptr addrspace(1) %out, double %in) {
entry:
%0 = call double @llvm.rint.f64(double %in)
- store double %0, double addrspace(1)* %out
+ store double %0, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}rint_v2f64:
; CI: v_rndne_f64_e32
; CI: v_rndne_f64_e32
-define amdgpu_kernel void @rint_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
+define amdgpu_kernel void @rint_v2f64(ptr addrspace(1) %out, <2 x double> %in) {
entry:
%0 = call <2 x double> @llvm.rint.v2f64(<2 x double> %in)
- store <2 x double> %0, <2 x double> addrspace(1)* %out
+ store <2 x double> %0, ptr addrspace(1) %out
ret void
}
@@ -33,10 +33,10 @@ entry:
; CI: v_rndne_f64_e32
; CI: v_rndne_f64_e32
; CI: v_rndne_f64_e32
-define amdgpu_kernel void @rint_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
+define amdgpu_kernel void @rint_v4f64(ptr addrspace(1) %out, <4 x double> %in) {
entry:
%0 = call <4 x double> @llvm.rint.v4f64(<4 x double> %in)
- store <4 x double> %0, <4 x double> addrspace(1)* %out
+ store <4 x double> %0, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.rint.ll b/llvm/test/CodeGen/AMDGPU/llvm.rint.ll
index 4056bc39448d..cabb9dc8a432 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.rint.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.rint.ll
@@ -6,10 +6,10 @@
; R600: RNDNE
; SI: v_rndne_f32_e32
-define amdgpu_kernel void @rint_f32(float addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @rint_f32(ptr addrspace(1) %out, float %in) {
entry:
%0 = call float @llvm.rint.f32(float %in) #0
- store float %0, float addrspace(1)* %out
+ store float %0, ptr addrspace(1) %out
ret void
}
@@ -19,10 +19,10 @@ entry:
; SI: v_rndne_f32_e32
; SI: v_rndne_f32_e32
-define amdgpu_kernel void @rint_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+define amdgpu_kernel void @rint_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
entry:
%0 = call <2 x float> @llvm.rint.v2f32(<2 x float> %in) #0
- store <2 x float> %0, <2 x float> addrspace(1)* %out
+ store <2 x float> %0, ptr addrspace(1) %out
ret void
}
@@ -36,10 +36,10 @@ entry:
; SI: v_rndne_f32_e32
; SI: v_rndne_f32_e32
; SI: v_rndne_f32_e32
-define amdgpu_kernel void @rint_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+define amdgpu_kernel void @rint_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
entry:
%0 = call <4 x float> @llvm.rint.v4f32(<4 x float> %in) #0
- store <4 x float> %0, <4 x float> addrspace(1)* %out
+ store <4 x float> %0, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
index 8b21896d6460..4ed575df2fbb 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
@@ -2,7 +2,7 @@
; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=CI %s
-define amdgpu_kernel void @round_f64(double addrspace(1)* %out, double %x) #0 {
+define amdgpu_kernel void @round_f64(ptr addrspace(1) %out, double %x) #0 {
; SI-LABEL: round_f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -59,11 +59,11 @@ define amdgpu_kernel void @round_f64(double addrspace(1)* %out, double %x) #0 {
; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; CI-NEXT: s_endpgm
%result = call double @llvm.round.f64(double %x) #1
- store double %result, double addrspace(1)* %out
+ store double %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_round_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_round_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; SI-LABEL: v_round_f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -128,15 +128,15 @@ define amdgpu_kernel void @v_round_f64(double addrspace(1)* %out, double addrspa
; CI-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
; CI-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
- %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
- %x = load double, double addrspace(1)* %gep
+ %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid
+ %out.gep = getelementptr double, ptr addrspace(1) %out, i32 %tid
+ %x = load double, ptr addrspace(1) %gep
%result = call double @llvm.round.f64(double %x) #1
- store double %result, double addrspace(1)* %out.gep
+ store double %result, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @round_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) #0 {
+define amdgpu_kernel void @round_v2f64(ptr addrspace(1) %out, <2 x double> %in) #0 {
; SI-LABEL: round_v2f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
@@ -219,11 +219,11 @@ define amdgpu_kernel void @round_v2f64(<2 x double> addrspace(1)* %out, <2 x dou
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; CI-NEXT: s_endpgm
%result = call <2 x double> @llvm.round.v2f64(<2 x double> %in) #1
- store <2 x double> %result, <2 x double> addrspace(1)* %out
+ store <2 x double> %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @round_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) #0 {
+define amdgpu_kernel void @round_v4f64(ptr addrspace(1) %out, <4 x double> %in) #0 {
; SI-LABEL: round_v4f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x11
@@ -364,11 +364,11 @@ define amdgpu_kernel void @round_v4f64(<4 x double> addrspace(1)* %out, <4 x dou
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; CI-NEXT: s_endpgm
%result = call <4 x double> @llvm.round.v4f64(<4 x double> %in) #1
- store <4 x double> %result, <4 x double> addrspace(1)* %out
+ store <4 x double> %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @round_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %in) #0 {
+define amdgpu_kernel void @round_v8f64(ptr addrspace(1) %out, <8 x double> %in) #0 {
; SI-LABEL: round_v8f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x19
@@ -625,7 +625,7 @@ define amdgpu_kernel void @round_v8f64(<8 x double> addrspace(1)* %out, <8 x dou
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[20:23], 0
; CI-NEXT: s_endpgm
%result = call <8 x double> @llvm.round.v8f64(<8 x double> %in) #1
- store <8 x double> %result, <8 x double> addrspace(1)* %out
+ store <8 x double> %result, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.ll
index ae029f56400b..6525c02fa967 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.round.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.round.ll
@@ -21,9 +21,9 @@
; R600-DAG: SETGE
; R600-DAG: CNDE
; R600-DAG: ADD
-define amdgpu_kernel void @round_f32(float addrspace(1)* %out, float %x) #0 {
+define amdgpu_kernel void @round_f32(ptr addrspace(1) %out, float %x) #0 {
%result = call float @llvm.round.f32(float %x) #1
- store float %result, float addrspace(1)* %out
+ store float %result, ptr addrspace(1) %out
ret void
}
@@ -35,27 +35,27 @@ define amdgpu_kernel void @round_f32(float addrspace(1)* %out, float %x) #0 {
; FUNC-LABEL: {{^}}round_v2f32:
; GCN: s_endpgm
; R600: CF_END
-define amdgpu_kernel void @round_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) #0 {
+define amdgpu_kernel void @round_v2f32(ptr addrspace(1) %out, <2 x float> %in) #0 {
%result = call <2 x float> @llvm.round.v2f32(<2 x float> %in) #1
- store <2 x float> %result, <2 x float> addrspace(1)* %out
+ store <2 x float> %result, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}round_v4f32:
; GCN: s_endpgm
; R600: CF_END
-define amdgpu_kernel void @round_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) #0 {
+define amdgpu_kernel void @round_v4f32(ptr addrspace(1) %out, <4 x float> %in) #0 {
%result = call <4 x float> @llvm.round.v4f32(<4 x float> %in) #1
- store <4 x float> %result, <4 x float> addrspace(1)* %out
+ store <4 x float> %result, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}round_v8f32:
; GCN: s_endpgm
; R600: CF_END
-define amdgpu_kernel void @round_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %in) #0 {
+define amdgpu_kernel void @round_v8f32(ptr addrspace(1) %out, <8 x float> %in) #0 {
%result = call <8 x float> @llvm.round.v8f32(<8 x float> %in) #1
- store <8 x float> %result, <8 x float> addrspace(1)* %out
+ store <8 x float> %result, ptr addrspace(1) %out
ret void
}
@@ -72,11 +72,11 @@ define amdgpu_kernel void @round_v8f32(<8 x float> addrspace(1)* %out, <8 x floa
; GFX89: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, [[COPYSIGN]]
; GFX89: v_add_f16_e32 [[RESULT:v[0-9]+]], [[TRUNC]], [[SEL]]
; GFX89: buffer_store_short [[RESULT]]
-define amdgpu_kernel void @round_f16(half addrspace(1)* %out, i32 %x.arg) #0 {
+define amdgpu_kernel void @round_f16(ptr addrspace(1) %out, i32 %x.arg) #0 {
%x.arg.trunc = trunc i32 %x.arg to i16
%x = bitcast i16 %x.arg.trunc to half
%result = call half @llvm.round.f16(half %x) #1
- store half %result, half addrspace(1)* %out
+ store half %result, ptr addrspace(1) %out
ret void
}
@@ -88,10 +88,10 @@ define amdgpu_kernel void @round_f16(half addrspace(1)* %out, i32 %x.arg) #0 {
; GFX89: v_bfi_b32 [[COPYSIGN1:v[0-9]+]], [[K]], [[BFI_K]],
; GFX9: v_pack_b32_f16
-define amdgpu_kernel void @round_v2f16(<2 x half> addrspace(1)* %out, i32 %in.arg) #0 {
+define amdgpu_kernel void @round_v2f16(ptr addrspace(1) %out, i32 %in.arg) #0 {
%in = bitcast i32 %in.arg to <2 x half>
%result = call <2 x half> @llvm.round.v2f16(<2 x half> %in)
- store <2 x half> %result, <2 x half> addrspace(1)* %out
+ store <2 x half> %result, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll
index 983fccaf044f..2ad122cf8185 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll
@@ -5,7 +5,7 @@
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX11 %s
-define amdgpu_kernel void @sin_f16(half addrspace(1)* %r, half addrspace(1)* %a) {
+define amdgpu_kernel void @sin_f16(ptr addrspace(1) %r, ptr addrspace(1) %a) {
; GFX6-LABEL: sin_f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -81,13 +81,13 @@ define amdgpu_kernel void @sin_f16(half addrspace(1)* %r, half addrspace(1)* %a)
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%r.val = call half @llvm.sin.f16(half %a.val)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
-define amdgpu_kernel void @sin_v2f16(<2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a) {
+define amdgpu_kernel void @sin_v2f16(ptr addrspace(1) %r, ptr addrspace(1) %a) {
; GFX6-LABEL: sin_v2f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -190,9 +190,9 @@ define amdgpu_kernel void @sin_v2f16(<2 x half> addrspace(1)* %r, <2 x half> add
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ %a.val = load <2 x half>, ptr addrspace(1) %a
%r.val = call <2 x half> @llvm.sin.v2f16(<2 x half> %a.val)
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sin.ll b/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
index 7f033a6c43a7..6431c3913dbc 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
@@ -16,9 +16,9 @@
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @sin_f32(float addrspace(1)* %out, float %x) #1 {
+define amdgpu_kernel void @sin_f32(ptr addrspace(1) %out, float %x) #1 {
%sin = call float @llvm.sin.f32(float %x)
- store float %sin, float addrspace(1)* %out
+ store float %sin, ptr addrspace(1) %out
ret void
}
@@ -29,10 +29,10 @@ define amdgpu_kernel void @sin_f32(float addrspace(1)* %out, float %x) #1 {
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @safe_sin_3x_f32(float addrspace(1)* %out, float %x) #1 {
+define amdgpu_kernel void @safe_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
%y = fmul float 3.0, %x
%sin = call float @llvm.sin.f32(float %y)
- store float %sin, float addrspace(1)* %out
+ store float %sin, ptr addrspace(1) %out
ret void
}
@@ -44,10 +44,10 @@ define amdgpu_kernel void @safe_sin_3x_f32(float addrspace(1)* %out, float %x) #
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @unsafe_sin_3x_f32(float addrspace(1)* %out, float %x) #2 {
+define amdgpu_kernel void @unsafe_sin_3x_f32(ptr addrspace(1) %out, float %x) #2 {
%y = fmul float 3.0, %x
%sin = call float @llvm.sin.f32(float %y)
- store float %sin, float addrspace(1)* %out
+ store float %sin, ptr addrspace(1) %out
ret void
}
@@ -59,10 +59,10 @@ define amdgpu_kernel void @unsafe_sin_3x_f32(float addrspace(1)* %out, float %x)
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @fmf_sin_3x_f32(float addrspace(1)* %out, float %x) #1 {
+define amdgpu_kernel void @fmf_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
%y = fmul reassoc float 3.0, %x
%sin = call reassoc float @llvm.sin.f32(float %y)
- store float %sin, float addrspace(1)* %out
+ store float %sin, ptr addrspace(1) %out
ret void
}
@@ -73,10 +73,10 @@ define amdgpu_kernel void @fmf_sin_3x_f32(float addrspace(1)* %out, float %x) #1
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @safe_sin_2x_f32(float addrspace(1)* %out, float %x) #1 {
+define amdgpu_kernel void @safe_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
%y = fmul float 2.0, %x
%sin = call float @llvm.sin.f32(float %y)
- store float %sin, float addrspace(1)* %out
+ store float %sin, ptr addrspace(1) %out
ret void
}
@@ -88,10 +88,10 @@ define amdgpu_kernel void @safe_sin_2x_f32(float addrspace(1)* %out, float %x) #
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @unsafe_sin_2x_f32(float addrspace(1)* %out, float %x) #2 {
+define amdgpu_kernel void @unsafe_sin_2x_f32(ptr addrspace(1) %out, float %x) #2 {
%y = fmul float 2.0, %x
%sin = call float @llvm.sin.f32(float %y)
- store float %sin, float addrspace(1)* %out
+ store float %sin, ptr addrspace(1) %out
ret void
}
@@ -103,10 +103,10 @@ define amdgpu_kernel void @unsafe_sin_2x_f32(float addrspace(1)* %out, float %x)
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @fmf_sin_2x_f32(float addrspace(1)* %out, float %x) #1 {
+define amdgpu_kernel void @fmf_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
%y = fmul reassoc float 2.0, %x
%sin = call reassoc float @llvm.sin.f32(float %y)
- store float %sin, float addrspace(1)* %out
+ store float %sin, ptr addrspace(1) %out
ret void
}
@@ -117,10 +117,10 @@ define amdgpu_kernel void @fmf_sin_2x_f32(float addrspace(1)* %out, float %x) #1
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @safe_sin_cancel_f32(float addrspace(1)* %out, float %x) #1 {
+define amdgpu_kernel void @safe_sin_cancel_f32(ptr addrspace(1) %out, float %x) #1 {
%y = fmul float 0x401921FB60000000, %x
%sin = call float @llvm.sin.f32(float %y)
- store float %sin, float addrspace(1)* %out
+ store float %sin, ptr addrspace(1) %out
ret void
}
@@ -131,10 +131,10 @@ define amdgpu_kernel void @safe_sin_cancel_f32(float addrspace(1)* %out, float %
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @unsafe_sin_cancel_f32(float addrspace(1)* %out, float %x) #2 {
+define amdgpu_kernel void @unsafe_sin_cancel_f32(ptr addrspace(1) %out, float %x) #2 {
%y = fmul float 0x401921FB60000000, %x
%sin = call float @llvm.sin.f32(float %y)
- store float %sin, float addrspace(1)* %out
+ store float %sin, ptr addrspace(1) %out
ret void
}
@@ -145,10 +145,10 @@ define amdgpu_kernel void @unsafe_sin_cancel_f32(float addrspace(1)* %out, float
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @fmf_sin_cancel_f32(float addrspace(1)* %out, float %x) #1 {
+define amdgpu_kernel void @fmf_sin_cancel_f32(ptr addrspace(1) %out, float %x) #1 {
%y = fmul reassoc float 0x401921FB60000000, %x
%sin = call reassoc float @llvm.sin.f32(float %y)
- store float %sin, float addrspace(1)* %out
+ store float %sin, ptr addrspace(1) %out
ret void
}
@@ -164,9 +164,9 @@ define amdgpu_kernel void @fmf_sin_cancel_f32(float addrspace(1)* %out, float %x
; GCN: v_sin_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @sin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %vx) #1 {
+define amdgpu_kernel void @sin_v4f32(ptr addrspace(1) %out, <4 x float> %vx) #1 {
%sin = call <4 x float> @llvm.sin.v4f32( <4 x float> %vx)
- store <4 x float> %sin, <4 x float> addrspace(1)* %out
+ store <4 x float> %sin, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll
index 1e00e09421d1..f90176b39218 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll
@@ -13,12 +13,12 @@ declare <2 x half> @llvm.sqrt.v2f16(<2 x half> %a)
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @sqrt_f16(
- half addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%r.val = call half @llvm.sqrt.f16(half %a.val)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -43,11 +43,11 @@ entry:
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @sqrt_v2f16(
- <2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ %a.val = load <2 x half>, ptr addrspace(1) %a
%r.val = call <2 x half> @llvm.sqrt.v2f16(<2 x half> %a.val)
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll
index 1e9212e45b7b..c36d3379dc8e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll
@@ -13,12 +13,12 @@ declare <2 x half> @llvm.trunc.v2f16(<2 x half> %a)
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @trunc_f16(
- half addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%r.val = call half @llvm.trunc.f16(half %a.val)
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -43,11 +43,11 @@ entry:
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @trunc_v2f16(
- <2 x half> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ %a.val = load <2 x half>, ptr addrspace(1) %a
%r.val = call <2 x half> @llvm.trunc.v2f16(<2 x half> %a.val)
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
More information about the llvm-commits
mailing list