[llvm] 6a91a5e - AMDGPU: Convert some cast tests to opaque pointers
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 29 16:05:03 PST 2022
Author: Matt Arsenault
Date: 2022-11-29T19:04:58-05:00
New Revision: 6a91a5e82647f206a31706586d201ecc638e9365
URL: https://github.com/llvm/llvm-project/commit/6a91a5e82647f206a31706586d201ecc638e9365
DIFF: https://github.com/llvm/llvm-project/commit/6a91a5e82647f206a31706586d201ecc638e9365.diff
LOG: AMDGPU: Convert some cast tests to opaque pointers
Added:
Modified:
llvm/test/CodeGen/AMDGPU/addrspacecast-captured.ll
llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
llvm/test/CodeGen/AMDGPU/addrspacecast-initializer-unsupported.ll
llvm/test/CodeGen/AMDGPU/addrspacecast-initializer.ll
llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll
llvm/test/CodeGen/AMDGPU/addrspacecast.ll
llvm/test/CodeGen/AMDGPU/any_extend_vector_inreg.ll
llvm/test/CodeGen/AMDGPU/anyext.ll
llvm/test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll
llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll
llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll
llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll
llvm/test/CodeGen/AMDGPU/fp32_to_fp16.ll
llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll
llvm/test/CodeGen/AMDGPU/fp_to_sint.ll
llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll
llvm/test/CodeGen/AMDGPU/fp_to_uint.ll
llvm/test/CodeGen/AMDGPU/fpext.f16.ll
llvm/test/CodeGen/AMDGPU/fpext.ll
llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
llvm/test/CodeGen/AMDGPU/fptoui.f16.ll
llvm/test/CodeGen/AMDGPU/i8-to-double-to-float.ll
llvm/test/CodeGen/AMDGPU/setcc-sext.ll
llvm/test/CodeGen/AMDGPU/sext-eliminate.ll
llvm/test/CodeGen/AMDGPU/sext-in-reg.ll
llvm/test/CodeGen/AMDGPU/sign_extend.ll
llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
llvm/test/CodeGen/AMDGPU/sint_to_fp.ll
llvm/test/CodeGen/AMDGPU/sitofp.f16.ll
llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
llvm/test/CodeGen/AMDGPU/uint_to_fp.ll
llvm/test/CodeGen/AMDGPU/uitofp.f16.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-captured.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-captured.ll
index 6e3550b4c9bf..db5d39e3884b 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-captured.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-captured.ll
@@ -7,40 +7,40 @@ declare void @consume_ptr2int(i32) #0
; CHECK-LABEL: @addrspacecast_captured(
; CHECK: %data = alloca i32, align 4, addrspace(5)
-; CHECK: %cast = addrspacecast i32 addrspace(5)* %data to i32*
-; CHECK: %ptr2int = ptrtoint i32* %cast to i32
-; CHECK: store i32 %ptr2int, i32 addrspace(1)* %out
-define amdgpu_kernel void @addrspacecast_captured(i32 addrspace(1)* %out) #0 {
+; CHECK: %cast = addrspacecast ptr addrspace(5) %data to ptr
+; CHECK: %ptr2int = ptrtoint ptr %cast to i32
+; CHECK: store i32 %ptr2int, ptr addrspace(1) %out
+define amdgpu_kernel void @addrspacecast_captured(ptr addrspace(1) %out) #0 {
entry:
%data = alloca i32, align 4, addrspace(5)
- %cast = addrspacecast i32 addrspace(5)* %data to i32*
- %ptr2int = ptrtoint i32* %cast to i32
- store i32 %ptr2int, i32 addrspace(1)* %out
+ %cast = addrspacecast ptr addrspace(5) %data to ptr
+ %ptr2int = ptrtoint ptr %cast to i32
+ store i32 %ptr2int, ptr addrspace(1) %out
ret void
}
; CHECK-LABEL: @addrspacecast_captured_store(
; CHECK: %data = alloca i32, align 4, addrspace(5)
-; CHECK: %cast = addrspacecast i32 addrspace(5)* %data to i32*
-; CHECK: store i32* %cast, i32* addrspace(1)* %out
-define amdgpu_kernel void @addrspacecast_captured_store(i32* addrspace(1)* %out) #0 {
+; CHECK: %cast = addrspacecast ptr addrspace(5) %data to ptr
+; CHECK: store ptr %cast, ptr addrspace(1) %out
+define amdgpu_kernel void @addrspacecast_captured_store(ptr addrspace(1) %out) #0 {
entry:
%data = alloca i32, align 4, addrspace(5)
- %cast = addrspacecast i32 addrspace(5)* %data to i32*
- store i32* %cast, i32* addrspace(1)* %out
+ %cast = addrspacecast ptr addrspace(5) %data to ptr
+ store ptr %cast, ptr addrspace(1) %out
ret void
}
; CHECK-LABEL: @addrspacecast_captured_call(
; CHECK: %data = alloca i32, align 4, addrspace(5)
-; CHECK: %cast = addrspacecast i32 addrspace(5)* %data to i32*
-; CHECK: %ptr2int = ptrtoint i32* %cast to i32
+; CHECK: %cast = addrspacecast ptr addrspace(5) %data to ptr
+; CHECK: %ptr2int = ptrtoint ptr %cast to i32
; CHECK: call void @consume_ptr2int(i32 %ptr2int)
define amdgpu_kernel void @addrspacecast_captured_call() #0 {
entry:
%data = alloca i32, align 4, addrspace(5)
- %cast = addrspacecast i32 addrspace(5)* %data to i32*
- %ptr2int = ptrtoint i32* %cast to i32
+ %cast = addrspacecast ptr addrspace(5) %data to ptr
+ %ptr2int = ptrtoint ptr %cast to i32
call void @consume_ptr2int(i32 %ptr2int)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
index b3be3702b3f5..66f249fe604b 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
@@ -2,7 +2,7 @@
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=HSA,AKF_HSA %s
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s
-declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrspace(4)* nocapture, i32, i1) #0
+declare void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) nocapture, ptr addrspace(4) nocapture, i32, i1) #0
@lds.i32 = unnamed_addr addrspace(3) global i32 undef, align 4
@lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4
@@ -19,179 +19,179 @@ declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrs
define amdgpu_kernel void @store_cast_0_flat_to_group_addrspacecast() #1 {
; HSA-LABEL: define {{[^@]+}}@store_cast_0_flat_to_group_addrspacecast
; HSA-SAME: () #[[ATTR1:[0-9]+]] {
-; HSA-NEXT: store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*), align 4
+; HSA-NEXT: store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)), align 4
; HSA-NEXT: ret void
;
- store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*)
+ store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3))
ret void
}
define amdgpu_kernel void @store_cast_0_group_to_flat_addrspacecast() #1 {
; AKF_HSA-LABEL: define {{[^@]+}}@store_cast_0_group_to_flat_addrspacecast
; AKF_HSA-SAME: () #[[ATTR1]] {
-; AKF_HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), align 4
+; AKF_HSA-NEXT: store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) null to ptr addrspace(4)), align 4
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_cast_0_group_to_flat_addrspacecast
; ATTRIBUTOR_HSA-SAME: () #[[ATTR2:[0-9]+]] {
-; ATTRIBUTOR_HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), align 4
+; ATTRIBUTOR_HSA-NEXT: store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) null to ptr addrspace(4)), align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
- store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*)
+ store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) null to ptr addrspace(4))
ret void
}
define amdgpu_kernel void @store_constant_cast_group_gv_to_flat() #1 {
; AKF_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_to_flat
; AKF_HSA-SAME: () #[[ATTR1]] {
-; AKF_HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*), align 4
+; AKF_HSA-NEXT: store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.i32 to ptr addrspace(4)), align 4
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_to_flat
; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] {
-; ATTRIBUTOR_HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*), align 4
+; ATTRIBUTOR_HSA-NEXT: store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.i32 to ptr addrspace(4)), align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
- store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*)
+ store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.i32 to ptr addrspace(4))
ret void
}
define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat() #1 {
; AKF_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat
; AKF_HSA-SAME: () #[[ATTR1]] {
-; AKF_HSA-NEXT: store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4
+; AKF_HSA-NEXT: store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat
; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] {
-; ATTRIBUTOR_HSA-NEXT: store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4
+; ATTRIBUTOR_HSA-NEXT: store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
- store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8)
+ store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8)
ret void
}
define amdgpu_kernel void @store_constant_cast_global_gv_to_flat() #1 {
; HSA-LABEL: define {{[^@]+}}@store_constant_cast_global_gv_to_flat
; HSA-SAME: () #[[ATTR1]] {
-; HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global.i32 to i32 addrspace(4)*), align 4
+; HSA-NEXT: store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global.i32 to ptr addrspace(4)), align 4
; HSA-NEXT: ret void
;
- store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global.i32 to i32 addrspace(4)*)
+ store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global.i32 to ptr addrspace(4))
ret void
}
define amdgpu_kernel void @store_constant_cast_global_gv_gep_to_flat() #1 {
; HSA-LABEL: define {{[^@]+}}@store_constant_cast_global_gv_gep_to_flat
; HSA-SAME: () #[[ATTR1]] {
-; HSA-NEXT: store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4
+; HSA-NEXT: store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global.arr to ptr addrspace(4)), i64 0, i64 8), align 4
; HSA-NEXT: ret void
;
- store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(4)*), i64 0, i64 8)
+ store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global.arr to ptr addrspace(4)), i64 0, i64 8)
ret void
}
-define amdgpu_kernel void @load_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
+define amdgpu_kernel void @load_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
; AKF_HSA-LABEL: define {{[^@]+}}@load_constant_cast_group_gv_gep_to_flat
-; AKF_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] {
-; AKF_HSA-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4
-; AKF_HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4
+; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; AKF_HSA-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
+; AKF_HSA-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@load_constant_cast_group_gv_gep_to_flat
-; ATTRIBUTOR_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
-; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4
-; ATTRIBUTOR_HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
+; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
+; ATTRIBUTOR_HSA-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
- %val = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8)
- store i32 %val, i32 addrspace(1)* %out
+ %val = load i32, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8)
+ store i32 %val, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @atomicrmw_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
+define amdgpu_kernel void @atomicrmw_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
; AKF_HSA-LABEL: define {{[^@]+}}@atomicrmw_constant_cast_group_gv_gep_to_flat
-; AKF_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] {
-; AKF_HSA-NEXT: [[VAL:%.*]] = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst, align 4
-; AKF_HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4
+; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; AKF_HSA-NEXT: [[VAL:%.*]] = atomicrmw add ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 1 seq_cst, align 4
+; AKF_HSA-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@atomicrmw_constant_cast_group_gv_gep_to_flat
-; ATTRIBUTOR_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
-; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst, align 4
-; ATTRIBUTOR_HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
+; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = atomicrmw add ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 1 seq_cst, align 4
+; ATTRIBUTOR_HSA-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
- %val = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst
- store i32 %val, i32 addrspace(1)* %out
+ %val = atomicrmw add ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 1 seq_cst
+ store i32 %val, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
+define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
; AKF_HSA-LABEL: define {{[^@]+}}@cmpxchg_constant_cast_group_gv_gep_to_flat
-; AKF_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] {
-; AKF_HSA-NEXT: [[VAL:%.*]] = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4
+; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; AKF_HSA-NEXT: [[VAL:%.*]] = cmpxchg ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4
; AKF_HSA-NEXT: [[VAL0:%.*]] = extractvalue { i32, i1 } [[VAL]], 0
-; AKF_HSA-NEXT: store i32 [[VAL0]], i32 addrspace(1)* [[OUT]], align 4
+; AKF_HSA-NEXT: store i32 [[VAL0]], ptr addrspace(1) [[OUT]], align 4
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@cmpxchg_constant_cast_group_gv_gep_to_flat
-; ATTRIBUTOR_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
-; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
+; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = cmpxchg ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4
; ATTRIBUTOR_HSA-NEXT: [[VAL0:%.*]] = extractvalue { i32, i1 } [[VAL]], 0
-; ATTRIBUTOR_HSA-NEXT: store i32 [[VAL0]], i32 addrspace(1)* [[OUT]], align 4
+; ATTRIBUTOR_HSA-NEXT: store i32 [[VAL0]], ptr addrspace(1) [[OUT]], align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
- %val = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst
+ %val = cmpxchg ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst
%val0 = extractvalue { i32, i1 } %val, 0
- store i32 %val0, i32 addrspace(1)* %out
+ store i32 %val0, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
+define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
; AKF_HSA-LABEL: define {{[^@]+}}@memcpy_constant_cast_group_gv_gep_to_flat
-; AKF_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] {
-; AKF_HSA-NEXT: call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 [[OUT]], i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false)
+; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; AKF_HSA-NEXT: call void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) align 4 [[OUT]], ptr addrspace(4) align 4 getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 32, i1 false)
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@memcpy_constant_cast_group_gv_gep_to_flat
-; ATTRIBUTOR_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
-; ATTRIBUTOR_HSA-NEXT: call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 [[OUT]], i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false)
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
+; ATTRIBUTOR_HSA-NEXT: call void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) align 4 [[OUT]], ptr addrspace(4) align 4 getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 32, i1 false)
; ATTRIBUTOR_HSA-NEXT: ret void
;
- call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 %out, i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false)
+ call void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) align 4 %out, ptr addrspace(4) align 4 getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 32, i1 false)
ret void
}
; Can't just search the pointer value
-define amdgpu_kernel void @store_value_constant_cast_lds_gv_gep_to_flat(i32 addrspace(4)* addrspace(1)* %out) #1 {
+define amdgpu_kernel void @store_value_constant_cast_lds_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
; AKF_HSA-LABEL: define {{[^@]+}}@store_value_constant_cast_lds_gv_gep_to_flat
-; AKF_HSA-SAME: (i32 addrspace(4)* addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] {
-; AKF_HSA-NEXT: store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* [[OUT]], align 8
+; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; AKF_HSA-NEXT: store ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), ptr addrspace(1) [[OUT]], align 8
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_value_constant_cast_lds_gv_gep_to_flat
-; ATTRIBUTOR_HSA-SAME: (i32 addrspace(4)* addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
-; ATTRIBUTOR_HSA-NEXT: store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* [[OUT]], align 8
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
+; ATTRIBUTOR_HSA-NEXT: store ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), ptr addrspace(1) [[OUT]], align 8
; ATTRIBUTOR_HSA-NEXT: ret void
;
- store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* %out
+ store ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), ptr addrspace(1) %out
ret void
}
; Can't just search pointer types
-define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(i64 addrspace(1)* %out) #1 {
+define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
; AKF_HSA-LABEL: define {{[^@]+}}@store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat
-; AKF_HSA-SAME: (i64 addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] {
-; AKF_HSA-NEXT: store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* [[OUT]], align 4
+; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; AKF_HSA-NEXT: store i64 ptrtoint (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to i64), ptr addrspace(1) [[OUT]], align 4
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat
-; ATTRIBUTOR_HSA-SAME: (i64 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
-; ATTRIBUTOR_HSA-NEXT: store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* [[OUT]], align 4
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
+; ATTRIBUTOR_HSA-NEXT: store i64 ptrtoint (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to i64), ptr addrspace(1) [[OUT]], align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
- store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* %out
+ store i64 ptrtoint (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to i64), ptr addrspace(1) %out
ret void
}
@@ -199,28 +199,28 @@ define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat
define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat_to_group() #1 {
; AKF_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat_to_group
; AKF_HSA-SAME: () #[[ATTR1]] {
-; AKF_HSA-NEXT: store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*), align 4
+; AKF_HSA-NEXT: store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3)), align 4
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat_to_group
; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] {
-; ATTRIBUTOR_HSA-NEXT: store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*), align 4
+; ATTRIBUTOR_HSA-NEXT: store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3)), align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
- store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*)
+ store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
ret void
}
-define i32 addrspace(3)* @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 {
+define ptr addrspace(3) @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 {
; AKF_HSA-LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group
; AKF_HSA-SAME: () #[[ATTR1]] {
-; AKF_HSA-NEXT: ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*)
+; AKF_HSA-NEXT: ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group
; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] {
-; ATTRIBUTOR_HSA-NEXT: ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*)
+; ATTRIBUTOR_HSA-NEXT: ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
;
- ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*)
+ ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
}
attributes #0 = { argmemonly nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer-unsupported.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer-unsupported.ll
index 593e37f4c8c7..cbd3e6835e37 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer-unsupported.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer-unsupported.ll
@@ -1,7 +1,7 @@
; RUN: not --crash llc -march=amdgcn -verify-machineinstrs -amdgpu-enable-lower-module-lds=false < %s 2>&1 | FileCheck -check-prefix=ERROR %s
-; ERROR: LLVM ERROR: Unsupported expression in static initializer: addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*)
+; ERROR: LLVM ERROR: Unsupported expression in static initializer: addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4))
@lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4
- at gv_flatptr_from_lds = unnamed_addr addrspace(2) global i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4
+ at gv_flatptr_from_lds = unnamed_addr addrspace(2) global ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer.ll
index 4f5082f9bd08..e22fcd84845a 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer.ll
@@ -19,9 +19,9 @@
@global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4
@constant.arr = external unnamed_addr addrspace(4) global [256 x i32], align 4
- at gv_flatptr_from_global = unnamed_addr addrspace(4) global i32 addrspace(0)* getelementptr ([256 x i32], [256 x i32] addrspace(0)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(0)*), i64 0, i64 8), align 4
+ at gv_flatptr_from_global = unnamed_addr addrspace(4) global ptr addrspace(0) getelementptr ([256 x i32], ptr addrspace(0) addrspacecast (ptr addrspace(1) @global.arr to ptr addrspace(0)), i64 0, i64 8), align 4
- at gv_global_ptr = unnamed_addr addrspace(4) global i32 addrspace(1)* getelementptr ([256 x i32], [256 x i32] addrspace(1)* @global.arr, i64 0, i64 8), align 4
+ at gv_global_ptr = unnamed_addr addrspace(4) global ptr addrspace(1) getelementptr ([256 x i32], ptr addrspace(1) @global.arr, i64 0, i64 8), align 4
- at gv_flatptr_from_constant = unnamed_addr addrspace(4) global i32 addrspace(0)* getelementptr ([256 x i32], [256 x i32] addrspace(0)* addrspacecast ([256 x i32] addrspace(4)* @constant.arr to [256 x i32] addrspace(0)*), i64 0, i64 8), align 4
+ at gv_flatptr_from_constant = unnamed_addr addrspace(4) global ptr addrspace(0) getelementptr ([256 x i32], ptr addrspace(0) addrspacecast (ptr addrspace(4) @constant.arr to ptr addrspace(0)), i64 0, i64 8), align 4
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll
index 2edaa889c1af..5de8a6f2430e 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll
@@ -4,8 +4,8 @@
; Test that a null check is not emitted for lowered addrspacecast
-define void @flat_user(i8* %ptr) {
- store i8 0, i8* %ptr
+define void @flat_user(ptr %ptr) {
+ store i8 0, ptr %ptr
ret void
}
@@ -18,8 +18,8 @@ define void @flat_user(i8* %ptr) {
; CHECK-NOT: v1
define void @cast_alloca() {
%alloca = alloca i8, addrspace(5)
- %cast = addrspacecast i8 addrspace(5)* %alloca to i8*
- call void @flat_user(i8* %cast)
+ %cast = addrspacecast ptr addrspace(5) %alloca to ptr
+ call void @flat_user(ptr %cast)
ret void
}
@@ -33,8 +33,8 @@ define void @cast_alloca() {
; CHECK-NOT: v0
; CHECK-NOT: v1
define void @cast_lds_gv() {
- %cast = addrspacecast i8 addrspace(3)* @lds to i8*
- call void @flat_user(i8* %cast)
+ %cast = addrspacecast ptr addrspace(3) @lds to ptr
+ call void @flat_user(ptr %cast)
ret void
}
@@ -42,7 +42,7 @@ define void @cast_lds_gv() {
; CHECK: v_mov_b32_e32 v0, 0
; CHECK: v_mov_b32_e32 v1, 0
define void @cast_constant_lds_neg1_gv() {
- call void @flat_user(i8* addrspacecast (i8 addrspace(3)* inttoptr (i32 -1 to i8 addrspace(3)*) to i8*))
+ call void @flat_user(ptr addrspacecast (ptr addrspace(3) inttoptr (i32 -1 to ptr addrspace(3)) to ptr))
ret void
}
@@ -50,7 +50,7 @@ define void @cast_constant_lds_neg1_gv() {
; CHECK: v_mov_b32_e32 v0, 0
; CHECK: v_mov_b32_e32 v1, 0
define void @cast_constant_private_neg1_gv() {
- call void @flat_user(i8* addrspacecast (i8 addrspace(5)* inttoptr (i32 -1 to i8 addrspace(5)*) to i8*))
+ call void @flat_user(ptr addrspacecast (ptr addrspace(5) inttoptr (i32 -1 to ptr addrspace(5)) to ptr))
ret void
}
@@ -60,7 +60,7 @@ define void @cast_constant_private_neg1_gv() {
; CHECK: v_mov_b32_e32 v0, 0x7b
; CHECK: v_mov_b32_e32 v1, [[APERTURE]]
define void @cast_constant_lds_other_gv() {
- call void @flat_user(i8* addrspacecast (i8 addrspace(3)* inttoptr (i32 123 to i8 addrspace(3)*) to i8*))
+ call void @flat_user(ptr addrspacecast (ptr addrspace(3) inttoptr (i32 123 to ptr addrspace(3)) to ptr))
ret void
}
@@ -70,6 +70,6 @@ define void @cast_constant_lds_other_gv() {
; CHECK: v_mov_b32_e32 v0, 0x7b
; CHECK: v_mov_b32_e32 v1, [[APERTURE]]
define void @cast_constant_private_other_gv() {
- call void @flat_user(i8* addrspacecast (i8 addrspace(5)* inttoptr (i32 123 to i8 addrspace(5)*) to i8*))
+ call void @flat_user(ptr addrspacecast (ptr addrspace(5) inttoptr (i32 123 to ptr addrspace(5)) to ptr))
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
index f5760793828b..b44616678ac8 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
@@ -29,9 +29,9 @@
; number SGPR.
; HSA: NumSgprs: {{[0-9]+}}
-define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 {
- %stof = addrspacecast i32 addrspace(3)* %ptr to i32*
- store volatile i32 7, i32* %stof
+define amdgpu_kernel void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) #0 {
+ %stof = addrspacecast ptr addrspace(3) %ptr to ptr
+ store volatile i32 7, ptr %stof
ret void
}
@@ -54,9 +54,9 @@ define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %pt
; GFX9-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc
; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
-define void @use_group_to_flat_addrspacecast_func(i32 addrspace(3)* %ptr) #0 {
- %stof = addrspacecast i32 addrspace(3)* %ptr to i32*
- store volatile i32 7, i32* %stof
+define void @use_group_to_flat_addrspacecast_func(ptr addrspace(3) %ptr) #0 {
+ %stof = addrspacecast ptr addrspace(3) %ptr to ptr
+ store volatile i32 7, ptr %stof
ret void
}
@@ -88,9 +88,9 @@ define void @use_group_to_flat_addrspacecast_func(i32 addrspace(3)* %ptr) #0 {
; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
; HSA: NumSgprs: {{[0-9]+}}
-define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #0 {
- %stof = addrspacecast i32 addrspace(5)* %ptr to i32*
- store volatile i32 7, i32* %stof
+define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 {
+ %stof = addrspacecast ptr addrspace(5) %ptr to ptr
+ store volatile i32 7, ptr %stof
ret void
}
@@ -103,9 +103,9 @@ define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %
; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
; HSA: flat_store_dword v[[[VPTRLO]]:[[VPTRHI]]], [[K]]
-define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #0 {
- %stof = addrspacecast i32 addrspace(1)* %ptr to i32*
- store volatile i32 7, i32* %stof
+define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) #0 {
+ %stof = addrspacecast ptr addrspace(1) %ptr to ptr
+ store volatile i32 7, ptr %stof
ret void
}
@@ -115,9 +115,9 @@ define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %p
; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
; HSA: flat_load_dword v{{[0-9]+}}, v[[[VPTRLO]]:[[VPTRHI]]]
-define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #0 {
- %stof = addrspacecast i32 addrspace(4)* %ptr to i32*
- %ld = load volatile i32, i32* %stof
+define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) #0 {
+ %stof = addrspacecast ptr addrspace(4) %ptr to ptr
+ %ld = load volatile i32, ptr %stof
ret void
}
@@ -129,9 +129,9 @@ define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)*
; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; GFX9: global_load_dword v{{[0-9]+}}, [[ZERO:v[0-9]+]], s[[[PTRLO]]:[[PTRHI]]]
-define amdgpu_kernel void @use_constant_to_global_addrspacecast(i32 addrspace(4)* %ptr) #0 {
- %stof = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)*
- %ld = load volatile i32, i32 addrspace(1)* %stof
+define amdgpu_kernel void @use_constant_to_global_addrspacecast(ptr addrspace(4) %ptr) #0 {
+ %stof = addrspacecast ptr addrspace(4) %ptr to ptr addrspace(1)
+ %ld = load volatile i32, ptr addrspace(1) %stof
ret void
}
@@ -151,9 +151,9 @@ define amdgpu_kernel void @use_constant_to_global_addrspacecast(i32 addrspace(4)
; GFX9-DAG: v_mov_b32_e32 [[CASTPTR:v[0-9]+]], s[[PTR_LO]]
; CI-DAG: ds_write_b32 [[VCASTPTR]], v[[K]]
; GFX9-DAG: ds_write_b32 [[CASTPTR]], v[[K]]
-define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #0 {
- %ftos = addrspacecast i32* %ptr to i32 addrspace(3)*
- store volatile i32 0, i32 addrspace(3)* %ftos
+define amdgpu_kernel void @use_flat_to_group_addrspacecast(ptr %ptr) #0 {
+ %ftos = addrspacecast ptr %ptr to ptr addrspace(3)
+ store volatile i32 0, ptr addrspace(3) %ftos
ret void
}
@@ -176,9 +176,9 @@ define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #0 {
; GFX9-DAG: v_mov_b32_e32 [[CASTPTR:v[0-9]+]], s[[PTR_LO]]
; CI: buffer_store_dword v[[K]], [[VCASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
; GFX9: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
-define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #0 {
- %ftos = addrspacecast i32* %ptr to i32 addrspace(5)*
- store volatile i32 0, i32 addrspace(5)* %ftos
+define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #0 {
+ %ftos = addrspacecast ptr %ptr to ptr addrspace(5)
+ store volatile i32 0, ptr addrspace(5) %ftos
ret void
}
@@ -193,9 +193,9 @@ define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #0 {
; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
; GFX9: global_store_dword [[ZERO]], [[ZERO]], s[[[PTRLO]]:[[PTRHI]]{{\]$}}
-define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #0 {
- %ftos = addrspacecast i32* %ptr to i32 addrspace(1)*
- store volatile i32 0, i32 addrspace(1)* %ftos
+define amdgpu_kernel void @use_flat_to_global_addrspacecast(ptr %ptr) #0 {
+ %ftos = addrspacecast ptr %ptr to ptr addrspace(1)
+ store volatile i32 0, ptr addrspace(1) %ftos
ret void
}
@@ -204,9 +204,9 @@ define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #0 {
; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]], s[4:5], 0x0
; HSA: s_load_dword s{{[0-9]+}}, s[[[PTRLO]]:[[PTRHI]]], 0x0
-define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #0 {
- %ftos = addrspacecast i32* %ptr to i32 addrspace(4)*
- load volatile i32, i32 addrspace(4)* %ftos
+define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 {
+ %ftos = addrspacecast ptr %ptr to ptr addrspace(4)
+ load volatile i32, ptr addrspace(4) %ftos
ret void
}
@@ -223,8 +223,8 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #0 {
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
- %cast = addrspacecast i32 addrspace(3)* null to i32*
- store volatile i32 7, i32* %cast
+ %cast = addrspacecast ptr addrspace(3) null to ptr
+ store volatile i32 7, ptr %cast
ret void
}
@@ -233,8 +233,8 @@ define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
; HSA: ds_write_b32 [[PTR]], [[K]]
define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 {
- %cast = addrspacecast i32* null to i32 addrspace(3)*
- store volatile i32 7, i32 addrspace(3)* %cast
+ %cast = addrspacecast ptr null to ptr addrspace(3)
+ store volatile i32 7, ptr addrspace(3) %cast
ret void
}
@@ -244,8 +244,8 @@ define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 {
; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 {
- %cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32*
- store volatile i32 7, i32* %cast
+ %cast = addrspacecast ptr addrspace(3) inttoptr (i32 -1 to ptr addrspace(3)) to ptr
+ store volatile i32 7, ptr %cast
ret void
}
@@ -254,8 +254,8 @@ define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 {
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
; HSA: ds_write_b32 [[PTR]], [[K]]
define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
- %cast = addrspacecast i32* inttoptr (i64 -1 to i32*) to i32 addrspace(3)*
- store volatile i32 7, i32 addrspace(3)* %cast
+ %cast = addrspacecast ptr inttoptr (i64 -1 to ptr) to ptr addrspace(3)
+ store volatile i32 7, ptr addrspace(3) %cast
ret void
}
@@ -273,8 +273,8 @@ define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
- %cast = addrspacecast i32 addrspace(5)* null to i32*
- store volatile i32 7, i32* %cast
+ %cast = addrspacecast ptr addrspace(5) null to ptr
+ store volatile i32 7, ptr %cast
ret void
}
@@ -283,8 +283,8 @@ define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, 0
define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 {
- %cast = addrspacecast i32* null to i32 addrspace(5)*
- store volatile i32 7, i32 addrspace(5)* %cast
+ %cast = addrspacecast ptr null to ptr addrspace(5)
+ store volatile i32 7, ptr addrspace(5) %cast
ret void
}
@@ -298,8 +298,8 @@ define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 {
; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
define amdgpu_kernel void @cast_neg1_private_to_flat_addrspacecast() #0 {
- %cast = addrspacecast i32 addrspace(5)* inttoptr (i32 -1 to i32 addrspace(5)*) to i32*
- store volatile i32 7, i32* %cast
+ %cast = addrspacecast ptr addrspace(5) inttoptr (i32 -1 to ptr addrspace(5)) to ptr
+ store volatile i32 7, ptr %cast
ret void
}
@@ -308,8 +308,8 @@ define amdgpu_kernel void @cast_neg1_private_to_flat_addrspacecast() #0 {
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, 0
define amdgpu_kernel void @cast_neg1_flat_to_private_addrspacecast() #0 {
- %cast = addrspacecast i32* inttoptr (i64 -1 to i32*) to i32 addrspace(5)*
- store volatile i32 7, i32 addrspace(5)* %cast
+ %cast = addrspacecast ptr inttoptr (i64 -1 to ptr) to ptr addrspace(5)
+ store volatile i32 7, ptr addrspace(5) %cast
ret void
}
@@ -320,24 +320,24 @@ define amdgpu_kernel void @cast_neg1_flat_to_private_addrspacecast() #0 {
; HSA-LABEL: {{^}}branch_use_flat_i32:
; HSA: {{flat|global}}_store_dword {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}
; HSA: s_endpgm
-define amdgpu_kernel void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
+define amdgpu_kernel void @branch_use_flat_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 %x, i32 %c) #0 {
entry:
%cmp = icmp ne i32 %c, 0
br i1 %cmp, label %local, label %global
local:
- %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32*
+ %flat_local = addrspacecast ptr addrspace(3) %lptr to ptr
br label %end
global:
- %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32*
+ %flat_global = addrspacecast ptr addrspace(1) %gptr to ptr
br label %end
end:
- %fptr = phi i32* [ %flat_local, %local ], [ %flat_global, %global ]
- store volatile i32 %x, i32* %fptr, align 4
-; %val = load i32, i32* %fptr, align 4
-; store i32 %val, i32 addrspace(1)* %out, align 4
+ %fptr = phi ptr [ %flat_local, %local ], [ %flat_global, %global ]
+ store volatile i32 %x, ptr %fptr, align 4
+; %val = load i32, ptr %fptr, align 4
+; store i32 %val, ptr addrspace(1) %out, align 4
ret void
}
@@ -353,16 +353,16 @@ end:
; HSA: {{flat|global}}_store_dword
; HSA: s_barrier
; HSA: {{flat|global}}_load_dword
-define amdgpu_kernel void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
+define amdgpu_kernel void @store_flat_scratch(ptr addrspace(1) noalias %out, i32) #0 {
%alloca = alloca i32, i32 9, align 4, addrspace(5)
%x = call i32 @llvm.amdgcn.workitem.id.x() #2
- %pptr = getelementptr i32, i32 addrspace(5)* %alloca, i32 %x
- %fptr = addrspacecast i32 addrspace(5)* %pptr to i32*
- store volatile i32 %x, i32* %fptr
+ %pptr = getelementptr i32, ptr addrspace(5) %alloca, i32 %x
+ %fptr = addrspacecast ptr addrspace(5) %pptr to ptr
+ store volatile i32 %x, ptr %fptr
; Dummy call
call void @llvm.amdgcn.s.barrier() #1
- %reload = load volatile i32, i32* %fptr, align 4
- store volatile i32 %reload, i32 addrspace(1)* %out, align 4
+ %reload = load volatile i32, ptr %fptr, align 4
+ store volatile i32 %reload, ptr addrspace(1) %out, align 4
ret void
}
@@ -373,12 +373,11 @@ define amdgpu_kernel void @store_flat_scratch(i32 addrspace(1)* noalias %out, i3
; GFX9: s_mov_b32 s[[PTR_HI]], 0{{$}}
; GFX9: s_add_i32 s[[PTR_LO]], s[[PTR_LO]], [[OFFSET]]
; GFX9: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x0{{$}}
-define amdgpu_kernel void @use_constant_to_constant32_addrspacecast(i8 addrspace(4)* addrspace(4)* %ptr.ptr, i32 %offset) #0 {
- %ptr = load volatile i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %ptr.ptr
- %addrspacecast = addrspacecast i8 addrspace(4)* %ptr to i8 addrspace(6)*
- %gep = getelementptr i8, i8 addrspace(6)* %addrspacecast, i32 %offset
- %ptr.cast = bitcast i8 addrspace(6)* %gep to i32 addrspace(6)*
- %load = load volatile i32, i32 addrspace(6)* %ptr.cast, align 4
+define amdgpu_kernel void @use_constant_to_constant32_addrspacecast(ptr addrspace(4) %ptr.ptr, i32 %offset) #0 {
+ %ptr = load volatile ptr addrspace(4), ptr addrspace(4) %ptr.ptr
+ %addrspacecast = addrspacecast ptr addrspace(4) %ptr to ptr addrspace(6)
+ %gep = getelementptr i8, ptr addrspace(6) %addrspacecast, i32 %offset
+ %load = load volatile i32, ptr addrspace(6) %gep, align 4
ret void
}
@@ -389,12 +388,11 @@ define amdgpu_kernel void @use_constant_to_constant32_addrspacecast(i8 addrspace
; GFX9: s_mov_b32 s[[PTR_HI]], 0{{$}}
; GFX9: s_add_i32 s[[PTR_LO]], s[[PTR_LO]], [[OFFSET]]
; GFX9: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x0{{$}}
-define amdgpu_kernel void @use_global_to_constant32_addrspacecast(i8 addrspace(1)* addrspace(4)* %ptr.ptr, i32 %offset) #0 {
- %ptr = load volatile i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* %ptr.ptr
- %addrspacecast = addrspacecast i8 addrspace(1)* %ptr to i8 addrspace(6)*
- %gep = getelementptr i8, i8 addrspace(6)* %addrspacecast, i32 %offset
- %ptr.cast = bitcast i8 addrspace(6)* %gep to i32 addrspace(6)*
- %load = load volatile i32, i32 addrspace(6)* %ptr.cast, align 4
+define amdgpu_kernel void @use_global_to_constant32_addrspacecast(ptr addrspace(4) %ptr.ptr, i32 %offset) #0 {
+ %ptr = load volatile ptr addrspace(1), ptr addrspace(4) %ptr.ptr
+ %addrspacecast = addrspacecast ptr addrspace(1) %ptr to ptr addrspace(6)
+ %gep = getelementptr i8, ptr addrspace(6) %addrspacecast, i32 %offset
+ %load = load volatile i32, ptr addrspace(6) %gep, align 4
ret void
}
@@ -403,9 +401,9 @@ define amdgpu_kernel void @use_global_to_constant32_addrspacecast(i8 addrspace(1
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], [[PTR]]
; GCN: flat_load_dword v{{[0-9]+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_0(i32 addrspace(6)* %ptr) #0 {
- %stof = addrspacecast i32 addrspace(6)* %ptr to i32*
- %load = load volatile i32, i32* %stof
+define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_0(ptr addrspace(6) %ptr) #0 {
+ %stof = addrspacecast ptr addrspace(6) %ptr to ptr
+ %load = load volatile i32, ptr %stof
ret void
}
@@ -414,9 +412,9 @@ define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_0(i32 addrspa
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0xffff8000
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], [[PTR]]
; GCN: flat_load_dword v{{[0-9]+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_1(i32 addrspace(6)* %ptr) #3 {
- %stof = addrspacecast i32 addrspace(6)* %ptr to i32*
- %load = load volatile i32, i32* %stof
+define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_1(ptr addrspace(6) %ptr) #3 {
+ %stof = addrspacecast ptr addrspace(6) %ptr to ptr
+ %load = load volatile i32, ptr %stof
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/any_extend_vector_inreg.ll b/llvm/test/CodeGen/AMDGPU/any_extend_vector_inreg.ll
index 2e5c32cdbd52..dc7fc9f6d6a6 100644
--- a/llvm/test/CodeGen/AMDGPU/any_extend_vector_inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/any_extend_vector_inreg.ll
@@ -24,20 +24,17 @@
; GCN: {{buffer|flat}}_store_byte
; GCN: {{buffer|flat}}_store_byte
; GCN: {{buffer|flat}}_store_byte
-define amdgpu_kernel void @any_extend_vector_inreg_v16i8_to_v4i32(<8 x i8> addrspace(1)* nocapture readonly %arg, <16 x i8> addrspace(1)* %arg1) local_unnamed_addr #0 {
+define amdgpu_kernel void @any_extend_vector_inreg_v16i8_to_v4i32(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) %arg1) local_unnamed_addr #0 {
bb:
- %tmp = bitcast <8 x i8> addrspace(1)* %arg to <16 x i8> addrspace(1)*
- %tmp2 = load <16 x i8>, <16 x i8> addrspace(1)* %tmp, align 16
+ %tmp2 = load <16 x i8>, ptr addrspace(1) %arg, align 16
%tmp3 = extractelement <16 x i8> %tmp2, i64 4
%tmp6 = extractelement <16 x i8> %tmp2, i64 11
- %tmp10 = getelementptr inbounds <8 x i8>, <8 x i8> addrspace(1)* %arg, i64 2
- %tmp11 = bitcast <8 x i8> addrspace(1)* %tmp10 to <16 x i8> addrspace(1)*
- %tmp12 = load <16 x i8>, <16 x i8> addrspace(1)* %tmp11, align 16
+ %tmp10 = getelementptr inbounds <8 x i8>, ptr addrspace(1) %arg, i64 2
+ %tmp12 = load <16 x i8>, ptr addrspace(1) %tmp10, align 16
%tmp13 = extractelement <16 x i8> %tmp12, i64 7
%tmp17 = extractelement <16 x i8> %tmp12, i64 12
- %tmp21 = getelementptr inbounds <8 x i8>, <8 x i8> addrspace(1)* %arg, i64 4
- %tmp22 = bitcast <8 x i8> addrspace(1)* %tmp21 to <16 x i8> addrspace(1)*
- %tmp23 = load <16 x i8>, <16 x i8> addrspace(1)* %tmp22, align 16
+ %tmp21 = getelementptr inbounds <8 x i8>, ptr addrspace(1) %arg, i64 4
+ %tmp23 = load <16 x i8>, ptr addrspace(1) %tmp21, align 16
%tmp24 = extractelement <16 x i8> %tmp23, i64 3
%tmp1 = insertelement <16 x i8> undef, i8 %tmp3, i32 2
%tmp4 = insertelement <16 x i8> %tmp1, i8 0, i32 3
@@ -50,7 +47,7 @@ bb:
%tmp16 = insertelement <16 x i8> %tmp15, i8 0, i32 10
%tmp18 = insertelement <16 x i8> %tmp16, i8 0, i32 11
%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp24, i32 12
- store <16 x i8> %tmp19, <16 x i8> addrspace(1)* %arg1, align 1
+ store <16 x i8> %tmp19, ptr addrspace(1) %arg1, align 1
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/anyext.ll b/llvm/test/CodeGen/AMDGPU/anyext.ll
index 34c35abe9369..68a8b0ae8dba 100644
--- a/llvm/test/CodeGen/AMDGPU/anyext.ll
+++ b/llvm/test/CodeGen/AMDGPU/anyext.ll
@@ -6,7 +6,7 @@
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
-define amdgpu_kernel void @anyext_i1_i32(i32 addrspace(1)* %out, i32 %cond) #0 {
+define amdgpu_kernel void @anyext_i1_i32(ptr addrspace(1) %out, i32 %cond) #0 {
; GCN-LABEL: anyext_i1_i32:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_dword s4, s[0:1], 0xb
@@ -55,11 +55,11 @@ entry:
%tmp2 = xor i8 %tmp1, -1
%tmp3 = and i8 %tmp2, 1
%tmp4 = zext i8 %tmp3 to i32
- store i32 %tmp4, i32 addrspace(1)* %out
+ store i32 %tmp4, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @s_anyext_i16_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %a, i16 addrspace(1)* %b) #0 {
+define amdgpu_kernel void @s_anyext_i16_i32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) #0 {
; GCN-LABEL: s_anyext_i16_i32:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -132,16 +132,16 @@ define amdgpu_kernel void @s_anyext_i16_i32(i32 addrspace(1)* %out, i16 addrspac
entry:
%tid.x = call i32 @llvm.amdgcn.workitem.id.x()
%tid.y = call i32 @llvm.amdgcn.workitem.id.y()
- %a.ptr = getelementptr i16, i16 addrspace(1)* %a, i32 %tid.x
- %b.ptr = getelementptr i16, i16 addrspace(1)* %b, i32 %tid.y
- %a.l = load i16, i16 addrspace(1)* %a.ptr
- %b.l = load i16, i16 addrspace(1)* %b.ptr
+ %a.ptr = getelementptr i16, ptr addrspace(1) %a, i32 %tid.x
+ %b.ptr = getelementptr i16, ptr addrspace(1) %b, i32 %tid.y
+ %a.l = load i16, ptr addrspace(1) %a.ptr
+ %b.l = load i16, ptr addrspace(1) %b.ptr
%tmp = add i16 %a.l, %b.l
%tmp1 = trunc i16 %tmp to i8
%tmp2 = xor i8 %tmp1, -1
%tmp3 = and i8 %tmp2, 1
%tmp4 = zext i8 %tmp3 to i32
- store i32 %tmp4, i32 addrspace(1)* %out
+ store i32 %tmp4, ptr addrspace(1) %out
ret void
}
@@ -186,7 +186,7 @@ define amdgpu_kernel void @anyext_v2i16_to_v2i32() #0 {
; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
bb:
- %tmp = load i16, i16 addrspace(1)* undef, align 2
+ %tmp = load i16, ptr addrspace(1) undef, align 2
%tmp2 = insertelement <2 x i16> undef, i16 %tmp, i32 1
%tmp4 = and <2 x i16> %tmp2, <i16 -32768, i16 -32768>
%tmp5 = zext <2 x i16> %tmp4 to <2 x i32>
@@ -196,7 +196,7 @@ bb:
%tmp10 = fcmp oeq <2 x float> %tmp8, zeroinitializer
%tmp11 = zext <2 x i1> %tmp10 to <2 x i8>
%tmp12 = extractelement <2 x i8> %tmp11, i32 1
- store i8 %tmp12, i8 addrspace(1)* undef, align 1
+ store i8 %tmp12, ptr addrspace(1) undef, align 1
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll
index 155de5353bcb..31e1ace2bb11 100644
--- a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll
+++ b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll
@@ -8,11 +8,11 @@
; SI-LLC-LABEL: {{^}}test:
; SI-LLC: s_mul_i32
; SI-LLC-NOT: mul
-define amdgpu_kernel void @test(i8 addrspace(1)* nocapture readonly %in, i32 %a, i8 %b) {
+define amdgpu_kernel void @test(ptr addrspace(1) nocapture readonly %in, i32 %a, i8 %b) {
entry:
%0 = mul nsw i32 %a, 3
%1 = sext i32 %0 to i64
- %2 = getelementptr i8, i8 addrspace(1)* %in, i64 %1
- store i8 %b, i8 addrspace(1)* %2
+ %2 = getelementptr i8, ptr addrspace(1) %in, i64 %1
+ store i8 %b, ptr addrspace(1) %2
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll b/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll
index cd4ac4d58ad3..e85dfed89f39 100644
--- a/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll
@@ -7,16 +7,16 @@
; GCN-NOT: v_cndmask_b32_e64 v{{[0-9]+}}, {{0|-1}}, {{0|-1}}
; GCN-NOT: v_and_b32_e32
-define amdgpu_kernel void @and_i1_sext_bool(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @and_i1_sext_bool(ptr addrspace(1) nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
- %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
- %v = load i32, i32 addrspace(1)* %gep, align 4
+ %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %x
+ %v = load i32, ptr addrspace(1) %gep, align 4
%cmp = icmp ugt i32 %x, %y
%ext = sext i1 %cmp to i32
%and = and i32 %v, %ext
- store i32 %and, i32 addrspace(1)* %gep, align 4
+ store i32 %and, ptr addrspace(1) %gep, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll b/llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll
index ce041364b76d..971f2e37e95e 100644
--- a/llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll
@@ -14,9 +14,9 @@ declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
; CM: MEM_RAT_CACHELESS STORE_DWORD [[RES:T[0-9]+\.[XYZW]]]
; EGCM: VTX_READ_16 [[VAL:T[0-9]+\.[XYZW]]]
; EGCM: FLT16_TO_FLT32{{[ *]*}}[[RES]], [[VAL]]
-define amdgpu_kernel void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
- %val = load i16, i16 addrspace(1)* %in, align 2
+define amdgpu_kernel void @test_convert_fp16_to_fp32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+ %val = load i16, ptr addrspace(1) %in, align 2
%cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
- store float %cvt, float addrspace(1)* %out, align 4
+ store float %cvt, ptr addrspace(1) %out, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll b/llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll
index 70f0c0c1afdb..5bb68412b53b 100644
--- a/llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll
@@ -8,9 +8,9 @@ declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
; GCN: v_cvt_f32_f16_e32 [[RESULT32:v[0-9]+]], [[VAL]]
; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[RESULT32]]
; GCN: buffer_store_dwordx2 [[RESULT]]
-define amdgpu_kernel void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
- %val = load i16, i16 addrspace(1)* %in, align 2
+define amdgpu_kernel void @test_convert_fp16_to_fp64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+ %val = load i16, ptr addrspace(1) %in, align 2
%cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone
- store double %cvt, double addrspace(1)* %out, align 4
+ store double %cvt, ptr addrspace(1) %out, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/fp32_to_fp16.ll b/llvm/test/CodeGen/AMDGPU/fp32_to_fp16.ll
index 579a1454dd9a..1786fea95b54 100644
--- a/llvm/test/CodeGen/AMDGPU/fp32_to_fp16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp32_to_fp16.ll
@@ -12,9 +12,9 @@ declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
; EG: MEM_RAT MSKOR
; EG: VTX_READ_32
; EG: FLT32_TO_FLT16
-define amdgpu_kernel void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
- %val = load float, float addrspace(1)* %in, align 4
+define amdgpu_kernel void @test_convert_fp32_to_fp16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+ %val = load float, ptr addrspace(1) %in, align 4
%cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone
- store i16 %cvt, i16 addrspace(1)* %out, align 2
+ store i16 %cvt, ptr addrspace(1) %out, align 2
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll b/llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll
index a602a0403758..2c318d7afb28 100644
--- a/llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll
@@ -6,18 +6,18 @@ declare double @llvm.fabs.f64(double) #1
; FUNC-LABEL: @fp_to_sint_f64_i32
; SI: v_cvt_i32_f64_e32
-define amdgpu_kernel void @fp_to_sint_f64_i32(i32 addrspace(1)* %out, double %in) {
+define amdgpu_kernel void @fp_to_sint_f64_i32(ptr addrspace(1) %out, double %in) {
%result = fptosi double %in to i32
- store i32 %result, i32 addrspace(1)* %out
+ store i32 %result, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: @fp_to_sint_v2f64_v2i32
; SI: v_cvt_i32_f64_e32
; SI: v_cvt_i32_f64_e32
-define amdgpu_kernel void @fp_to_sint_v2f64_v2i32(<2 x i32> addrspace(1)* %out, <2 x double> %in) {
+define amdgpu_kernel void @fp_to_sint_v2f64_v2i32(ptr addrspace(1) %out, <2 x double> %in) {
%result = fptosi <2 x double> %in to <2 x i32>
- store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+ store <2 x i32> %result, ptr addrspace(1) %out
ret void
}
@@ -26,9 +26,9 @@ define amdgpu_kernel void @fp_to_sint_v2f64_v2i32(<2 x i32> addrspace(1)* %out,
; SI: v_cvt_i32_f64_e32
; SI: v_cvt_i32_f64_e32
; SI: v_cvt_i32_f64_e32
-define amdgpu_kernel void @fp_to_sint_v4f64_v4i32(<4 x i32> addrspace(1)* %out, <4 x double> %in) {
+define amdgpu_kernel void @fp_to_sint_v4f64_v4i32(ptr addrspace(1) %out, <4 x double> %in) {
%result = fptosi <4 x double> %in to <4 x i32>
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+ store <4 x i32> %result, ptr addrspace(1) %out
ret void
}
@@ -47,29 +47,29 @@ define amdgpu_kernel void @fp_to_sint_v4f64_v4i32(<4 x i32> addrspace(1)* %out,
; CI-DAG: v_cvt_u32_f64_e32 v[[LO:[0-9]+]], [[FMA]]
; CI-DAG: v_cvt_i32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]]
; CI: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @fp_to_sint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
+define amdgpu_kernel void @fp_to_sint_i64_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
- %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
- %val = load double, double addrspace(1)* %gep, align 8
+ %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid
+ %val = load double, ptr addrspace(1) %gep, align 8
%cast = fptosi double %val to i64
- store i64 %cast, i64 addrspace(1)* %out, align 8
+ store i64 %cast, ptr addrspace(1) %out, align 8
ret void
}
; FUNC-LABEL: {{^}}fp_to_sint_f64_to_i1:
; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fp_to_sint_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 {
+define amdgpu_kernel void @fp_to_sint_f64_to_i1(ptr addrspace(1) %out, double %in) #0 {
%conv = fptosi double %in to i1
- store i1 %conv, i1 addrspace(1)* %out
+ store i1 %conv, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}fp_to_sint_fabs_f64_to_i1:
; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, |s{{\[[0-9]+:[0-9]+\]}}|
-define amdgpu_kernel void @fp_to_sint_fabs_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 {
+define amdgpu_kernel void @fp_to_sint_fabs_f64_to_i1(ptr addrspace(1) %out, double %in) #0 {
%in.fabs = call double @llvm.fabs.f64(double %in)
%conv = fptosi double %in.fabs to i1
- store i1 %conv, i1 addrspace(1)* %out
+ store i1 %conv, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll b/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll
index 18b0fe3084c9..e32d5d773058 100644
--- a/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll
@@ -5,7 +5,7 @@
declare float @llvm.fabs.f32(float) #1
-define amdgpu_kernel void @fp_to_sint_i32(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fp_to_sint_i32(ptr addrspace(1) %out, float %in) {
; SI-LABEL: fp_to_sint_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xb
@@ -40,11 +40,11 @@ define amdgpu_kernel void @fp_to_sint_i32(i32 addrspace(1)* %out, float %in) {
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%conv = fptosi float %in to i32
- store i32 %conv, i32 addrspace(1)* %out
+ store i32 %conv, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fp_to_sint_i32_fabs(ptr addrspace(1) %out, float %in) {
; SI-LABEL: fp_to_sint_i32_fabs:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xb
@@ -80,11 +80,11 @@ define amdgpu_kernel void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%in.fabs = call float @llvm.fabs.f32(float %in)
%conv = fptosi float %in.fabs to i32
- store i32 %conv, i32 addrspace(1)* %out
+ store i32 %conv, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
+define amdgpu_kernel void @fp_to_sint_v2i32(ptr addrspace(1) %out, <2 x float> %in) {
; SI-LABEL: fp_to_sint_v2i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -125,11 +125,11 @@ define amdgpu_kernel void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x f
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = fptosi <2 x float> %in to <2 x i32>
- store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+ store <2 x i32> %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+define amdgpu_kernel void @fp_to_sint_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: fp_to_sint_v4i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -183,14 +183,14 @@ define amdgpu_kernel void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x f
; EG-NEXT: FLT_TO_INT T0.X, PV.W,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %value = load <4 x float>, <4 x float> addrspace(1) * %in
+ %value = load <4 x float>, ptr addrspace(1) %in
%result = fptosi <4 x float> %value to <4 x i32>
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+ store <4 x i32> %result, ptr addrspace(1) %out
ret void
}
; Check that the compiler doesn't crash with a "cannot select" error
-define amdgpu_kernel void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fp_to_sint_i64 (ptr addrspace(1) %out, float %in) {
; SI-LABEL: fp_to_sint_i64:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -287,11 +287,11 @@ define amdgpu_kernel void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) {
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
entry:
%0 = fptosi float %in to i64
- store i64 %0, i64 addrspace(1)* %out
+ store i64 %0, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
+define amdgpu_kernel void @fp_to_sint_v2i64(ptr addrspace(1) %out, <2 x float> %x) {
; SI-LABEL: fp_to_sint_v2i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -446,11 +446,11 @@ define amdgpu_kernel void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x f
; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%conv = fptosi <2 x float> %x to <2 x i64>
- store <2 x i64> %conv, <2 x i64> addrspace(1)* %out
+ store <2 x i64> %conv, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
+define amdgpu_kernel void @fp_to_sint_v4i64(ptr addrspace(1) %out, <4 x float> %x) {
; SI-LABEL: fp_to_sint_v4i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -733,11 +733,11 @@ define amdgpu_kernel void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x f
; EG-NEXT: LSHR * T0.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%conv = fptosi <4 x float> %x to <4 x i64>
- store <4 x i64> %conv, <4 x i64> addrspace(1)* %out
+ store <4 x i64> %conv, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 {
+define amdgpu_kernel void @fp_to_uint_f32_to_i1(ptr addrspace(1) %out, float %in) #0 {
; SI-LABEL: fp_to_uint_f32_to_i1:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xb
@@ -783,11 +783,11 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%conv = fptosi float %in to i1
- store i1 %conv, i1 addrspace(1)* %out
+ store i1 %conv, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 {
+define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(ptr addrspace(1) %out, float %in) #0 {
; SI-LABEL: fp_to_uint_fabs_f32_to_i1:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xb
@@ -834,11 +834,11 @@ define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, floa
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%in.fabs = call float @llvm.fabs.f32(float %in)
%conv = fptosi float %in.fabs to i1
- store i1 %conv, i1 addrspace(1)* %out
+ store i1 %conv, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @fp_to_sint_f32_i16(i16 addrspace(1)* %out, float %in) #0 {
+define amdgpu_kernel void @fp_to_sint_f32_i16(ptr addrspace(1) %out, float %in) #0 {
; SI-LABEL: fp_to_sint_f32_i16:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xb
@@ -883,7 +883,7 @@ define amdgpu_kernel void @fp_to_sint_f32_i16(i16 addrspace(1)* %out, float %in)
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%sint = fptosi float %in to i16
- store i16 %sint, i16 addrspace(1)* %out
+ store i16 %sint, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll b/llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll
index 8a86446472ed..ba23dd03af73 100644
--- a/llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll
@@ -6,18 +6,18 @@ declare double @llvm.fabs.f64(double) #1
; SI-LABEL: {{^}}fp_to_uint_i32_f64:
; SI: v_cvt_u32_f64_e32
-define amdgpu_kernel void @fp_to_uint_i32_f64(i32 addrspace(1)* %out, double %in) {
+define amdgpu_kernel void @fp_to_uint_i32_f64(ptr addrspace(1) %out, double %in) {
%cast = fptoui double %in to i32
- store i32 %cast, i32 addrspace(1)* %out, align 4
+ store i32 %cast, ptr addrspace(1) %out, align 4
ret void
}
; SI-LABEL: @fp_to_uint_v2i32_v2f64
; SI: v_cvt_u32_f64_e32
; SI: v_cvt_u32_f64_e32
-define amdgpu_kernel void @fp_to_uint_v2i32_v2f64(<2 x i32> addrspace(1)* %out, <2 x double> %in) {
+define amdgpu_kernel void @fp_to_uint_v2i32_v2f64(ptr addrspace(1) %out, <2 x double> %in) {
%cast = fptoui <2 x double> %in to <2 x i32>
- store <2 x i32> %cast, <2 x i32> addrspace(1)* %out, align 8
+ store <2 x i32> %cast, ptr addrspace(1) %out, align 8
ret void
}
@@ -26,9 +26,9 @@ define amdgpu_kernel void @fp_to_uint_v2i32_v2f64(<2 x i32> addrspace(1)* %out,
; SI: v_cvt_u32_f64_e32
; SI: v_cvt_u32_f64_e32
; SI: v_cvt_u32_f64_e32
-define amdgpu_kernel void @fp_to_uint_v4i32_v4f64(<4 x i32> addrspace(1)* %out, <4 x double> %in) {
+define amdgpu_kernel void @fp_to_uint_v4i32_v4f64(ptr addrspace(1) %out, <4 x double> %in) {
%cast = fptoui <4 x double> %in to <4 x i32>
- store <4 x i32> %cast, <4 x i32> addrspace(1)* %out, align 8
+ store <4 x i32> %cast, ptr addrspace(1) %out, align 8
ret void
}
@@ -47,43 +47,43 @@ define amdgpu_kernel void @fp_to_uint_v4i32_v4f64(<4 x i32> addrspace(1)* %out,
; CI-DAG: v_cvt_u32_f64_e32 v[[LO:[0-9]+]], [[FMA]]
; CI-DAG: v_cvt_u32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]]
; CI: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @fp_to_uint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
+define amdgpu_kernel void @fp_to_uint_i64_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
- %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
- %val = load double, double addrspace(1)* %gep, align 8
+ %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid
+ %val = load double, ptr addrspace(1) %gep, align 8
%cast = fptoui double %val to i64
- store i64 %cast, i64 addrspace(1)* %out, align 4
+ store i64 %cast, ptr addrspace(1) %out, align 4
ret void
}
; SI-LABEL: @fp_to_uint_v2i64_v2f64
-define amdgpu_kernel void @fp_to_uint_v2i64_v2f64(<2 x i64> addrspace(1)* %out, <2 x double> %in) {
+define amdgpu_kernel void @fp_to_uint_v2i64_v2f64(ptr addrspace(1) %out, <2 x double> %in) {
%cast = fptoui <2 x double> %in to <2 x i64>
- store <2 x i64> %cast, <2 x i64> addrspace(1)* %out, align 16
+ store <2 x i64> %cast, ptr addrspace(1) %out, align 16
ret void
}
; SI-LABEL: @fp_to_uint_v4i64_v4f64
-define amdgpu_kernel void @fp_to_uint_v4i64_v4f64(<4 x i64> addrspace(1)* %out, <4 x double> %in) {
+define amdgpu_kernel void @fp_to_uint_v4i64_v4f64(ptr addrspace(1) %out, <4 x double> %in) {
%cast = fptoui <4 x double> %in to <4 x i64>
- store <4 x i64> %cast, <4 x i64> addrspace(1)* %out, align 32
+ store <4 x i64> %cast, ptr addrspace(1) %out, align 32
ret void
}
; FUNC-LABEL: {{^}}fp_to_uint_f64_to_i1:
; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fp_to_uint_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 {
+define amdgpu_kernel void @fp_to_uint_f64_to_i1(ptr addrspace(1) %out, double %in) #0 {
%conv = fptoui double %in to i1
- store i1 %conv, i1 addrspace(1)* %out
+ store i1 %conv, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}fp_to_uint_fabs_f64_to_i1:
; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, |s{{\[[0-9]+:[0-9]+\]}}|
-define amdgpu_kernel void @fp_to_uint_fabs_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 {
+define amdgpu_kernel void @fp_to_uint_fabs_f64_to_i1(ptr addrspace(1) %out, double %in) #0 {
%in.fabs = call double @llvm.fabs.f64(double %in)
%conv = fptoui double %in.fabs to i1
- store i1 %conv, i1 addrspace(1)* %out
+ store i1 %conv, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll b/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll
index ab90330e15c2..ca64382d9c8c 100644
--- a/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll
@@ -5,7 +5,7 @@
declare float @llvm.fabs.f32(float) #1
-define amdgpu_kernel void @fp_to_uint_f32_to_i32 (i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fp_to_uint_f32_to_i32 (ptr addrspace(1) %out, float %in) {
; SI-LABEL: fp_to_uint_f32_to_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xb
@@ -40,11 +40,11 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i32 (i32 addrspace(1)* %out, float
; EG-NEXT: FLT_TO_UINT * T1.X, PV.W,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%conv = fptoui float %in to i32
- store i32 %conv, i32 addrspace(1)* %out
+ store i32 %conv, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
+define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i32(ptr addrspace(1) %out, <2 x float> %in) {
; SI-LABEL: fp_to_uint_v2f32_to_v2i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -85,11 +85,11 @@ define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i32(<2 x i32> addrspace(1)* %ou
; EG-NEXT: FLT_TO_UINT * T0.X, T1.W,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = fptoui <2 x float> %in to <2 x i32>
- store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+ store <2 x i32> %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: fp_to_uint_v4f32_to_v4i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -143,13 +143,13 @@ define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i32(<4 x i32> addrspace(1)* %ou
; EG-NEXT: LSHR T1.X, KC0[2].Y, literal.x,
; EG-NEXT: FLT_TO_UINT * T0.X, PV.W,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %value = load <4 x float>, <4 x float> addrspace(1) * %in
+ %value = load <4 x float>, ptr addrspace(1) %in
%result = fptoui <4 x float> %value to <4 x i32>
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+ store <4 x i32> %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @fp_to_uint_f32_to_i64(i64 addrspace(1)* %out, float %x) {
+define amdgpu_kernel void @fp_to_uint_f32_to_i64(ptr addrspace(1) %out, float %x) {
; SI-LABEL: fp_to_uint_f32_to_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -233,11 +233,11 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i64(i64 addrspace(1)* %out, float %
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%conv = fptoui float %x to i64
- store i64 %conv, i64 addrspace(1)* %out
+ store i64 %conv, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
+define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i64(ptr addrspace(1) %out, <2 x float> %x) {
; SI-LABEL: fp_to_uint_v2f32_to_v2i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -370,11 +370,11 @@ define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i64(<2 x i64> addrspace(1)* %ou
; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%conv = fptoui <2 x float> %x to <2 x i64>
- store <2 x i64> %conv, <2 x i64> addrspace(1)* %out
+ store <2 x i64> %conv, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
+define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i64(ptr addrspace(1) %out, <4 x float> %x) {
; SI-LABEL: fp_to_uint_v4f32_to_v4i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -615,11 +615,11 @@ define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i64(<4 x i64> addrspace(1)* %ou
; EG-NEXT: LSHR * T0.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%conv = fptoui <4 x float> %x to <4 x i64>
- store <4 x i64> %conv, <4 x i64> addrspace(1)* %out
+ store <4 x i64> %conv, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 {
+define amdgpu_kernel void @fp_to_uint_f32_to_i1(ptr addrspace(1) %out, float %in) #0 {
; SI-LABEL: fp_to_uint_f32_to_i1:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xb
@@ -665,11 +665,11 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%conv = fptoui float %in to i1
- store i1 %conv, i1 addrspace(1)* %out
+ store i1 %conv, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 {
+define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(ptr addrspace(1) %out, float %in) #0 {
; SI-LABEL: fp_to_uint_fabs_f32_to_i1:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xb
@@ -716,11 +716,11 @@ define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, floa
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%in.fabs = call float @llvm.fabs.f32(float %in)
%conv = fptoui float %in.fabs to i1
- store i1 %conv, i1 addrspace(1)* %out
+ store i1 %conv, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @fp_to_uint_f32_to_i16(i16 addrspace(1)* %out, float %in) #0 {
+define amdgpu_kernel void @fp_to_uint_f32_to_i16(ptr addrspace(1) %out, float %in) #0 {
; SI-LABEL: fp_to_uint_f32_to_i16:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xb
@@ -764,7 +764,7 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i16(i16 addrspace(1)* %out, float %
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%uint = fptoui float %in to i16
- store i16 %uint, i16 addrspace(1)* %out
+ store i16 %uint, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/fpext.f16.ll b/llvm/test/CodeGen/AMDGPU/fpext.f16.ll
index 91dac92fb0b9..0572f9a3270d 100644
--- a/llvm/test/CodeGen/AMDGPU/fpext.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fpext.f16.ll
@@ -8,12 +8,12 @@
; GCN: buffer_store_dword v[[R_F32]]
; GCN: s_endpgm
define amdgpu_kernel void @fpext_f16_to_f32(
- float addrspace(1)* %r,
- half addrspace(1)* %a) #0 {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) #0 {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%r.val = fpext half %a.val to float
- store float %r.val, float addrspace(1)* %r
+ store float %r.val, ptr addrspace(1) %r
ret void
}
@@ -24,12 +24,12 @@ entry:
; GCN: buffer_store_dwordx2 v[[[R_F64_0]]:[[R_F64_1]]]
; GCN: s_endpgm
define amdgpu_kernel void @fpext_f16_to_f64(
- double addrspace(1)* %r,
- half addrspace(1)* %a) #0 {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) #0 {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%r.val = fpext half %a.val to double
- store double %r.val, double addrspace(1)* %r
+ store double %r.val, ptr addrspace(1) %r
ret void
}
@@ -43,12 +43,12 @@ entry:
; GCN: s_endpgm
define amdgpu_kernel void @fpext_v2f16_to_v2f32(
- <2 x float> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a) #0 {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) #0 {
entry:
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ %a.val = load <2 x half>, ptr addrspace(1) %a
%r.val = fpext <2 x half> %a.val to <2 x float>
- store <2 x float> %r.val, <2 x float> addrspace(1)* %r
+ store <2 x float> %r.val, ptr addrspace(1) %r
ret void
}
@@ -65,23 +65,23 @@ entry:
; GCN: s_endpgm
define amdgpu_kernel void @fpext_v2f16_to_v2f64(
- <2 x double> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ %a.val = load <2 x half>, ptr addrspace(1) %a
%r.val = fpext <2 x half> %a.val to <2 x double>
- store <2 x double> %r.val, <2 x double> addrspace(1)* %r
+ store <2 x double> %r.val, ptr addrspace(1) %r
ret void
}
; GCN-LABEL: {{^}}s_fneg_fpext_f16_to_f32:
; GCN: v_cvt_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}
-define amdgpu_kernel void @s_fneg_fpext_f16_to_f32(float addrspace(1)* %r, i32 %a) {
+define amdgpu_kernel void @s_fneg_fpext_f16_to_f32(ptr addrspace(1) %r, i32 %a) {
entry:
%a.trunc = trunc i32 %a to i16
%a.val = bitcast i16 %a.trunc to half
%r.val = fpext half %a.val to float
- store float %r.val, float addrspace(1)* %r
+ store float %r.val, ptr addrspace(1) %r
ret void
}
@@ -89,13 +89,13 @@ entry:
; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, -[[A]]
define amdgpu_kernel void @fneg_fpext_f16_to_f32(
- float addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%a.neg = fsub half -0.0, %a.val
%r.val = fpext half %a.neg to float
- store float %r.val, float addrspace(1)* %r
+ store float %r.val, ptr addrspace(1) %r
ret void
}
@@ -103,13 +103,13 @@ entry:
; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, |[[A]]|
define amdgpu_kernel void @fabs_fpext_f16_to_f32(
- float addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%a.fabs = call half @llvm.fabs.f16(half %a.val)
%r.val = fpext half %a.fabs to float
- store float %r.val, float addrspace(1)* %r
+ store float %r.val, ptr addrspace(1) %r
ret void
}
@@ -117,14 +117,14 @@ entry:
; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, -|[[A]]|
define amdgpu_kernel void @fneg_fabs_fpext_f16_to_f32(
- float addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%a.fabs = call half @llvm.fabs.f16(half %a.val)
%a.fneg.fabs = fsub half -0.0, %a.fabs
%r.val = fpext half %a.fneg.fabs to float
- store float %r.val, float addrspace(1)* %r
+ store float %r.val, ptr addrspace(1) %r
ret void
}
@@ -139,14 +139,14 @@ entry:
; GCN: store_dword [[CVT]]
; GCN: store_short [[XOR]]
define amdgpu_kernel void @fneg_multi_use_fpext_f16_to_f32(
- float addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%a.neg = fsub half -0.0, %a.val
%r.val = fpext half %a.neg to float
- store volatile float %r.val, float addrspace(1)* %r
- store volatile half %a.neg, half addrspace(1)* undef
+ store volatile float %r.val, ptr addrspace(1) %r
+ store volatile half %a.neg, ptr addrspace(1) undef
ret void
}
@@ -163,15 +163,15 @@ entry:
; GCN: buffer_store_dword [[CVTA_NEG]]
; GCN: buffer_store_short [[MUL]]
define amdgpu_kernel void @fneg_multi_foldable_use_fpext_f16_to_f32(
- float addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%a.neg = fsub half -0.0, %a.val
%r.val = fpext half %a.neg to float
%mul = fmul half %a.neg, %a.val
- store volatile float %r.val, float addrspace(1)* %r
- store volatile half %mul, half addrspace(1)* undef
+ store volatile float %r.val, ptr addrspace(1) %r
+ store volatile half %mul, ptr addrspace(1) undef
ret void
}
@@ -185,14 +185,14 @@ entry:
; GCN: store_dword [[CVT]]
; GCN: store_short [[XOR]]
define amdgpu_kernel void @fabs_multi_use_fpext_f16_to_f32(
- float addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%a.fabs = call half @llvm.fabs.f16(half %a.val)
%r.val = fpext half %a.fabs to float
- store volatile float %r.val, float addrspace(1)* %r
- store volatile half %a.fabs, half addrspace(1)* undef
+ store volatile float %r.val, ptr addrspace(1) %r
+ store volatile half %a.fabs, ptr addrspace(1) undef
ret void
}
@@ -209,15 +209,15 @@ entry:
; GCN: buffer_store_dword [[ABS_A]]
; GCN: buffer_store_short [[MUL]]
define amdgpu_kernel void @fabs_multi_foldable_use_fpext_f16_to_f32(
- float addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%a.fabs = call half @llvm.fabs.f16(half %a.val)
%r.val = fpext half %a.fabs to float
%mul = fmul half %a.fabs, %a.val
- store volatile float %r.val, float addrspace(1)* %r
- store volatile half %mul, half addrspace(1)* undef
+ store volatile float %r.val, ptr addrspace(1) %r
+ store volatile half %mul, ptr addrspace(1) undef
ret void
}
@@ -231,15 +231,15 @@ entry:
; GCN: buffer_store_dword [[CVT]]
; GCN: buffer_store_short [[OR]]
define amdgpu_kernel void @fabs_fneg_multi_use_fpext_f16_to_f32(
- float addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%a.fabs = call half @llvm.fabs.f16(half %a.val)
%a.fneg.fabs = fsub half -0.0, %a.fabs
%r.val = fpext half %a.fneg.fabs to float
- store volatile float %r.val, float addrspace(1)* %r
- store volatile half %a.fneg.fabs, half addrspace(1)* undef
+ store volatile float %r.val, ptr addrspace(1) %r
+ store volatile half %a.fneg.fabs, ptr addrspace(1) undef
ret void
}
@@ -256,16 +256,16 @@ entry:
; GCN: buffer_store_dword [[FABS_FNEG]]
; GCN: buffer_store_short [[MUL]]
define amdgpu_kernel void @fabs_fneg_multi_foldable_use_fpext_f16_to_f32(
- float addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%a.fabs = call half @llvm.fabs.f16(half %a.val)
%a.fneg.fabs = fsub half -0.0, %a.fabs
%r.val = fpext half %a.fneg.fabs to float
%mul = fmul half %a.fneg.fabs, %a.val
- store volatile float %r.val, float addrspace(1)* %r
- store volatile half %mul, half addrspace(1)* undef
+ store volatile float %r.val, ptr addrspace(1) %r
+ store volatile half %mul, ptr addrspace(1) undef
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/fpext.ll b/llvm/test/CodeGen/AMDGPU/fpext.ll
index b11e2ea056c3..cc5291cf2a99 100644
--- a/llvm/test/CodeGen/AMDGPU/fpext.ll
+++ b/llvm/test/CodeGen/AMDGPU/fpext.ll
@@ -3,18 +3,18 @@
; FUNC-LABEL: {{^}}fpext_f32_to_f64:
; SI: v_cvt_f64_f32_e32 {{v\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
-define amdgpu_kernel void @fpext_f32_to_f64(double addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fpext_f32_to_f64(ptr addrspace(1) %out, float %in) {
%result = fpext float %in to double
- store double %result, double addrspace(1)* %out
+ store double %result, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}fpext_v2f32_to_v2f64:
; SI: v_cvt_f64_f32_e32
; SI: v_cvt_f64_f32_e32
-define amdgpu_kernel void @fpext_v2f32_to_v2f64(<2 x double> addrspace(1)* %out, <2 x float> %in) {
+define amdgpu_kernel void @fpext_v2f32_to_v2f64(ptr addrspace(1) %out, <2 x float> %in) {
%result = fpext <2 x float> %in to <2 x double>
- store <2 x double> %result, <2 x double> addrspace(1)* %out
+ store <2 x double> %result, ptr addrspace(1) %out
ret void
}
@@ -22,9 +22,9 @@ define amdgpu_kernel void @fpext_v2f32_to_v2f64(<2 x double> addrspace(1)* %out,
; SI: v_cvt_f64_f32_e32
; SI: v_cvt_f64_f32_e32
; SI: v_cvt_f64_f32_e32
-define amdgpu_kernel void @fpext_v3f32_to_v3f64(<3 x double> addrspace(1)* %out, <3 x float> %in) {
+define amdgpu_kernel void @fpext_v3f32_to_v3f64(ptr addrspace(1) %out, <3 x float> %in) {
%result = fpext <3 x float> %in to <3 x double>
- store <3 x double> %result, <3 x double> addrspace(1)* %out
+ store <3 x double> %result, ptr addrspace(1) %out
ret void
}
@@ -33,9 +33,9 @@ define amdgpu_kernel void @fpext_v3f32_to_v3f64(<3 x double> addrspace(1)* %out,
; SI: v_cvt_f64_f32_e32
; SI: v_cvt_f64_f32_e32
; SI: v_cvt_f64_f32_e32
-define amdgpu_kernel void @fpext_v4f32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x float> %in) {
+define amdgpu_kernel void @fpext_v4f32_to_v4f64(ptr addrspace(1) %out, <4 x float> %in) {
%result = fpext <4 x float> %in to <4 x double>
- store <4 x double> %result, <4 x double> addrspace(1)* %out
+ store <4 x double> %result, ptr addrspace(1) %out
ret void
}
@@ -48,8 +48,8 @@ define amdgpu_kernel void @fpext_v4f32_to_v4f64(<4 x double> addrspace(1)* %out,
; SI: v_cvt_f64_f32_e32
; SI: v_cvt_f64_f32_e32
; SI: v_cvt_f64_f32_e32
-define amdgpu_kernel void @fpext_v8f32_to_v8f64(<8 x double> addrspace(1)* %out, <8 x float> %in) {
+define amdgpu_kernel void @fpext_v8f32_to_v8f64(ptr addrspace(1) %out, <8 x float> %in) {
%result = fpext <8 x float> %in to <8 x double>
- store <8 x double> %result, <8 x double> addrspace(1)* %out
+ store <8 x double> %result, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
index 6fc9b7f9009d..03792ca8da0d 100644
--- a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
@@ -9,12 +9,12 @@
; GCN: buffer_store_short v[[R_I16]]
; GCN: s_endpgm
define amdgpu_kernel void @fptosi_f16_to_i16(
- i16 addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%r.val = fptosi half %a.val to i16
- store i16 %r.val, i16 addrspace(1)* %r
+ store i16 %r.val, ptr addrspace(1) %r
ret void
}
@@ -25,12 +25,12 @@ entry:
; GCN: buffer_store_dword v[[R_I32]]
; GCN: s_endpgm
define amdgpu_kernel void @fptosi_f16_to_i32(
- i32 addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%r.val = fptosi half %a.val to i32
- store i32 %r.val, i32 addrspace(1)* %r
+ store i32 %r.val, ptr addrspace(1) %r
ret void
}
@@ -45,12 +45,12 @@ entry:
; GCN: buffer_store_dwordx2 v[[[R_I64_Low]]{{\:}}[[R_I64_High]]]
; GCN: s_endpgm
define amdgpu_kernel void @fptosi_f16_to_i64(
- i64 addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%r.val = fptosi half %a.val to i64
- store i64 %r.val, i64 addrspace(1)* %r
+ store i64 %r.val, ptr addrspace(1) %r
ret void
}
@@ -74,12 +74,12 @@ entry:
; GCN: s_endpgm
define amdgpu_kernel void @fptosi_v2f16_to_v2i16(
- <2 x i16> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ %a.val = load <2 x half>, ptr addrspace(1) %a
%r.val = fptosi <2 x half> %a.val to <2 x i16>
- store <2 x i16> %r.val, <2 x i16> addrspace(1)* %r
+ store <2 x i16> %r.val, ptr addrspace(1) %r
ret void
}
@@ -93,12 +93,12 @@ entry:
; GCN: buffer_store_dwordx2
; GCN: s_endpgm
define amdgpu_kernel void @fptosi_v2f16_to_v2i32(
- <2 x i32> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ %a.val = load <2 x half>, ptr addrspace(1) %a
%r.val = fptosi <2 x half> %a.val to <2 x i32>
- store <2 x i32> %r.val, <2 x i32> addrspace(1)* %r
+ store <2 x i32> %r.val, ptr addrspace(1) %r
ret void
}
@@ -124,12 +124,12 @@ entry:
; GCN: buffer_store_dwordx4 v[[[R_I64_0_Low]]{{\:}}[[R_I64_1_High]]]
; GCN: s_endpgm
define amdgpu_kernel void @fptosi_v2f16_to_v2i64(
- <2 x i64> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ %a.val = load <2 x half>, ptr addrspace(1) %a
%r.val = fptosi <2 x half> %a.val to <2 x i64>
- store <2 x i64> %r.val, <2 x i64> addrspace(1)* %r
+ store <2 x i64> %r.val, ptr addrspace(1) %r
ret void
}
@@ -139,9 +139,9 @@ entry:
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 0xbc00, s{{[0-9]+}}
; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s[4:5]
-define amdgpu_kernel void @fptosi_f16_to_i1(i1 addrspace(1)* %out, half %in) {
+define amdgpu_kernel void @fptosi_f16_to_i1(ptr addrspace(1) %out, half %in) {
entry:
%conv = fptosi half %in to i1
- store i1 %conv, i1 addrspace(1)* %out
+ store i1 %conv, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll b/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll
index bf8677b14395..48b76eb313b8 100644
--- a/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll
@@ -9,12 +9,12 @@
; GCN: buffer_store_short v[[R_I16]]
; GCN: s_endpgm
define amdgpu_kernel void @fptoui_f16_to_i16(
- i16 addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%r.val = fptoui half %a.val to i16
- store i16 %r.val, i16 addrspace(1)* %r
+ store i16 %r.val, ptr addrspace(1) %r
ret void
}
@@ -25,12 +25,12 @@ entry:
; GCN: buffer_store_dword v[[R_I32]]
; GCN: s_endpgm
define amdgpu_kernel void @fptoui_f16_to_i32(
- i32 addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%r.val = fptoui half %a.val to i32
- store i32 %r.val, i32 addrspace(1)* %r
+ store i32 %r.val, ptr addrspace(1) %r
ret void
}
@@ -45,12 +45,12 @@ entry:
; GCN: buffer_store_dwordx2 v[[[R_I64_Low]]{{\:}}[[R_I64_High]]]
; GCN: s_endpgm
define amdgpu_kernel void @fptoui_f16_to_i64(
- i64 addrspace(1)* %r,
- half addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load half, half addrspace(1)* %a
+ %a.val = load half, ptr addrspace(1) %a
%r.val = fptoui half %a.val to i64
- store i64 %r.val, i64 addrspace(1)* %r
+ store i64 %r.val, ptr addrspace(1) %r
ret void
}
@@ -73,12 +73,12 @@ entry:
; GCN: s_endpgm
define amdgpu_kernel void @fptoui_v2f16_to_v2i16(
- <2 x i16> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ %a.val = load <2 x half>, ptr addrspace(1) %a
%r.val = fptoui <2 x half> %a.val to <2 x i16>
- store <2 x i16> %r.val, <2 x i16> addrspace(1)* %r
+ store <2 x i16> %r.val, ptr addrspace(1) %r
ret void
}
@@ -92,12 +92,12 @@ entry:
; GCN: buffer_store_dwordx2
; GCN: s_endpgm
define amdgpu_kernel void @fptoui_v2f16_to_v2i32(
- <2 x i32> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ %a.val = load <2 x half>, ptr addrspace(1) %a
%r.val = fptoui <2 x half> %a.val to <2 x i32>
- store <2 x i32> %r.val, <2 x i32> addrspace(1)* %r
+ store <2 x i32> %r.val, ptr addrspace(1) %r
ret void
}
@@ -120,12 +120,12 @@ entry:
; GCN: buffer_store_dwordx4 v[[[R_I64_0_Low]]{{\:}}[[R_I64_1_High]]]
; GCN: s_endpgm
define amdgpu_kernel void @fptoui_v2f16_to_v2i64(
- <2 x i64> addrspace(1)* %r,
- <2 x half> addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ %a.val = load <2 x half>, ptr addrspace(1) %a
%r.val = fptoui <2 x half> %a.val to <2 x i64>
- store <2 x i64> %r.val, <2 x i64> addrspace(1)* %r
+ store <2 x i64> %r.val, ptr addrspace(1) %r
ret void
}
@@ -135,9 +135,9 @@ entry:
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{[0-9]+}}
; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s[4:5]
-define amdgpu_kernel void @fptoui_f16_to_i1(i1 addrspace(1)* %out, half %in) {
+define amdgpu_kernel void @fptoui_f16_to_i1(ptr addrspace(1) %out, half %in) {
entry:
%conv = fptoui half %in to i1
- store i1 %conv, i1 addrspace(1)* %out
+ store i1 %conv, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/i8-to-double-to-float.ll b/llvm/test/CodeGen/AMDGPU/i8-to-double-to-float.ll
index d501be5c8bf0..8300436d2f41 100644
--- a/llvm/test/CodeGen/AMDGPU/i8-to-double-to-float.ll
+++ b/llvm/test/CodeGen/AMDGPU/i8-to-double-to-float.ll
@@ -2,10 +2,10 @@
;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @test(float addrspace(1)* %out, i8 addrspace(1)* %in) {
- %1 = load i8, i8 addrspace(1)* %in
+define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %1 = load i8, ptr addrspace(1) %in
%2 = uitofp i8 %1 to double
%3 = fptrunc double %2 to float
- store float %3, float addrspace(1)* %out
+ store float %3, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/setcc-sext.ll b/llvm/test/CodeGen/AMDGPU/setcc-sext.ll
index eadce225e350..0c2d669652af 100644
--- a/llvm/test/CodeGen/AMDGPU/setcc-sext.ll
+++ b/llvm/test/CodeGen/AMDGPU/setcc-sext.ll
@@ -5,7 +5,7 @@
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_
-define amdgpu_kernel void @setcc_sgt_true_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_sgt_true_sext(ptr addrspace(1) nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -15,7 +15,7 @@ bb:
br i1 %cond, label %then, label %endif
then:
- store i32 1, i32 addrspace(1)* %arg, align 4
+ store i32 1, ptr addrspace(1) %arg, align 4
br label %endif
endif:
@@ -27,7 +27,7 @@ endif:
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_
-define amdgpu_kernel void @setcc_sgt_true_sext_swap(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_sgt_true_sext_swap(ptr addrspace(1) nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -37,7 +37,7 @@ bb:
br i1 %cond, label %then, label %endif
then:
- store i32 1, i32 addrspace(1)* %arg, align 4
+ store i32 1, ptr addrspace(1) %arg, align 4
br label %endif
endif:
@@ -49,7 +49,7 @@ endif:
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_
-define amdgpu_kernel void @setcc_ne_true_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_ne_true_sext(ptr addrspace(1) nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -59,7 +59,7 @@ bb:
br i1 %cond, label %then, label %endif
then:
- store i32 1, i32 addrspace(1)* %arg, align 4
+ store i32 1, ptr addrspace(1) %arg, align 4
br label %endif
endif:
@@ -71,7 +71,7 @@ endif:
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_
-define amdgpu_kernel void @setcc_ult_true_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_ult_true_sext(ptr addrspace(1) nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -81,7 +81,7 @@ bb:
br i1 %cond, label %then, label %endif
then:
- store i32 1, i32 addrspace(1)* %arg, align 4
+ store i32 1, ptr addrspace(1) %arg, align 4
br label %endif
endif:
@@ -93,7 +93,7 @@ endif:
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_
-define amdgpu_kernel void @setcc_eq_true_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_eq_true_sext(ptr addrspace(1) nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -103,7 +103,7 @@ bb:
br i1 %cond, label %then, label %endif
then:
- store i32 1, i32 addrspace(1)* %arg, align 4
+ store i32 1, ptr addrspace(1) %arg, align 4
br label %endif
endif:
@@ -115,7 +115,7 @@ endif:
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_
-define amdgpu_kernel void @setcc_sle_true_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_sle_true_sext(ptr addrspace(1) nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -125,7 +125,7 @@ bb:
br i1 %cond, label %then, label %endif
then:
- store i32 1, i32 addrspace(1)* %arg, align 4
+ store i32 1, ptr addrspace(1) %arg, align 4
br label %endif
endif:
@@ -137,7 +137,7 @@ endif:
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_
-define amdgpu_kernel void @setcc_uge_true_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_uge_true_sext(ptr addrspace(1) nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -147,7 +147,7 @@ bb:
br i1 %cond, label %then, label %endif
then:
- store i32 1, i32 addrspace(1)* %arg, align 4
+ store i32 1, ptr addrspace(1) %arg, align 4
br label %endif
endif:
@@ -159,7 +159,7 @@ endif:
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_
-define amdgpu_kernel void @setcc_eq_false_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_eq_false_sext(ptr addrspace(1) nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -169,7 +169,7 @@ bb:
br i1 %cond, label %then, label %endif
then:
- store i32 1, i32 addrspace(1)* %arg, align 4
+ store i32 1, ptr addrspace(1) %arg, align 4
br label %endif
endif:
@@ -181,7 +181,7 @@ endif:
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_
-define amdgpu_kernel void @setcc_sge_false_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_sge_false_sext(ptr addrspace(1) nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -191,7 +191,7 @@ bb:
br i1 %cond, label %then, label %endif
then:
- store i32 1, i32 addrspace(1)* %arg, align 4
+ store i32 1, ptr addrspace(1) %arg, align 4
br label %endif
endif:
@@ -203,7 +203,7 @@ endif:
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_
-define amdgpu_kernel void @setcc_ule_false_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_ule_false_sext(ptr addrspace(1) nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -213,7 +213,7 @@ bb:
br i1 %cond, label %then, label %endif
then:
- store i32 1, i32 addrspace(1)* %arg, align 4
+ store i32 1, ptr addrspace(1) %arg, align 4
br label %endif
endif:
@@ -225,7 +225,7 @@ endif:
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_
-define amdgpu_kernel void @setcc_ne_false_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_ne_false_sext(ptr addrspace(1) nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -235,7 +235,7 @@ bb:
br i1 %cond, label %then, label %endif
then:
- store i32 1, i32 addrspace(1)* %arg, align 4
+ store i32 1, ptr addrspace(1) %arg, align 4
br label %endif
endif:
@@ -246,7 +246,7 @@ endif:
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_
-define amdgpu_kernel void @setcc_ugt_false_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_ugt_false_sext(ptr addrspace(1) nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -256,7 +256,7 @@ bb:
br i1 %cond, label %then, label %endif
then:
- store i32 1, i32 addrspace(1)* %arg, align 4
+ store i32 1, ptr addrspace(1) %arg, align 4
br label %endif
endif:
@@ -267,7 +267,7 @@ endif:
; GCN-NEXT: s_and_saveexec_b64 {{[^,]+}}, [[CC]]
; GCN-NOT: v_cndmask_
-define amdgpu_kernel void @setcc_slt_false_sext(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @setcc_slt_false_sext(ptr addrspace(1) nocapture %arg) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
%y = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -277,7 +277,7 @@ bb:
br i1 %cond, label %then, label %endif
then:
- store i32 1, i32 addrspace(1)* %arg, align 4
+ store i32 1, ptr addrspace(1) %arg, align 4
br label %endif
endif:
diff --git a/llvm/test/CodeGen/AMDGPU/sext-eliminate.ll b/llvm/test/CodeGen/AMDGPU/sext-eliminate.ll
index 0b780af17bca..91b418a02462 100644
--- a/llvm/test/CodeGen/AMDGPU/sext-eliminate.ll
+++ b/llvm/test/CodeGen/AMDGPU/sext-eliminate.ll
@@ -6,10 +6,10 @@
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG: SUB_INT {{[* ]*}}[[RES]]
; EG-NOT: BFE
-define amdgpu_kernel void @sext_in_reg_i1_i32_add(i32 addrspace(1)* %out, i1 %a, i32 %b) {
+define amdgpu_kernel void @sext_in_reg_i1_i32_add(ptr addrspace(1) %out, i1 %a, i32 %b) {
%sext = sext i1 %a to i32
%res = add i32 %b, %sext
- store i32 %res, i32 addrspace(1)* %out
+ store i32 %res, ptr addrspace(1) %out
ret void
}
@@ -18,9 +18,9 @@ define amdgpu_kernel void @sext_in_reg_i1_i32_add(i32 addrspace(1)* %out, i1 %a,
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG: ADD_INT {{[* ]*}}[[RES]]
; EG-NOT: BFE
-define amdgpu_kernel void @sext_in_reg_i1_i32_sub(i32 addrspace(1)* %out, i1 %a, i32 %b) {
+define amdgpu_kernel void @sext_in_reg_i1_i32_sub(ptr addrspace(1) %out, i1 %a, i32 %b) {
%sext = sext i1 %a to i32
%res = sub i32 %b, %sext
- store i32 %res, i32 addrspace(1)* %out
+ store i32 %res, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll b/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll
index 30487eac73d1..637683315b73 100644
--- a/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll
@@ -15,10 +15,10 @@
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG: LSHR * [[ADDR]]
; EG: BFE_INT * [[RES]], {{.*}}, 0.0, 1
-define amdgpu_kernel void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) #0 {
+define amdgpu_kernel void @sext_in_reg_i1_i32(ptr addrspace(1) %out, i32 %in) #0 {
%shl = shl i32 %in, 31
%sext = ashr i32 %shl, 31
- store i32 %sext, i32 addrspace(1)* %out
+ store i32 %sext, ptr addrspace(1) %out
ret void
}
@@ -32,11 +32,11 @@ define amdgpu_kernel void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) #
; EG: ADD_INT
; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
; EG-NEXT: LSHR * [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i8_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
%c = add i32 %a, %b ; add to prevent folding into extload
%shl = shl i32 %c, 24
%ashr = ashr i32 %shl, 24
- store i32 %ashr, i32 addrspace(1)* %out, align 4
+ store i32 %ashr, ptr addrspace(1) %out, align 4
ret void
}
@@ -50,11 +50,11 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a,
; EG: ADD_INT
; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
; EG-NEXT: LSHR * [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i16_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
%c = add i32 %a, %b ; add to prevent folding into extload
%shl = shl i32 %c, 16
%ashr = ashr i32 %shl, 16
- store i32 %ashr, i32 addrspace(1)* %out, align 4
+ store i32 %ashr, ptr addrspace(1) %out, align 4
ret void
}
@@ -68,11 +68,11 @@ define amdgpu_kernel void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a
; EG: ADD_INT
; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
; EG-NEXT: LSHR * [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i8_to_v1i32(ptr addrspace(1) %out, <1 x i32> %a, <1 x i32> %b) #0 {
%c = add <1 x i32> %a, %b ; add to prevent folding into extload
%shl = shl <1 x i32> %c, <i32 24>
%ashr = ashr <1 x i32> %shl, <i32 24>
- store <1 x i32> %ashr, <1 x i32> addrspace(1)* %out, align 4
+ store <1 x i32> %ashr, ptr addrspace(1) %out, align 4
ret void
}
@@ -82,11 +82,11 @@ define amdgpu_kernel void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out,
; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
-define amdgpu_kernel void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i1_to_i64(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
%c = shl i64 %a, %b
%shl = shl i64 %c, 63
%ashr = ashr i64 %shl, 63
- store i64 %ashr, i64 addrspace(1)* %out, align 8
+ store i64 %ashr, ptr addrspace(1) %out, align 8
ret void
}
@@ -96,11 +96,11 @@ define amdgpu_kernel void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a,
; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
-define amdgpu_kernel void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i8_to_i64(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
%c = shl i64 %a, %b
%shl = shl i64 %c, 56
%ashr = ashr i64 %shl, 56
- store i64 %ashr, i64 addrspace(1)* %out, align 8
+ store i64 %ashr, ptr addrspace(1) %out, align 8
ret void
}
@@ -111,11 +111,11 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a,
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
-define amdgpu_kernel void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i16_to_i64(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
%c = shl i64 %a, %b
%shl = shl i64 %c, 48
%ashr = ashr i64 %shl, 48
- store i64 %ashr, i64 addrspace(1)* %out, align 8
+ store i64 %ashr, ptr addrspace(1) %out, align 8
ret void
}
@@ -125,11 +125,11 @@ define amdgpu_kernel void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a
; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
-define amdgpu_kernel void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i32_to_i64(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
%c = shl i64 %a, %b
%shl = shl i64 %c, 32
%ashr = ashr i64 %shl, 32
- store i64 %ashr, i64 addrspace(1)* %out, align 8
+ store i64 %ashr, ptr addrspace(1) %out, align 8
ret void
}
@@ -140,11 +140,11 @@ define amdgpu_kernel void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a
; XGCN: buffer_store_dword
; XEG: BFE_INT
; XEG: ASHR
-; define amdgpu_kernel void @sext_in_reg_i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a, <1 x i64> %b) #0 {
+; define amdgpu_kernel void @sext_in_reg_i8_to_v1i64(ptr addrspace(1) %out, <1 x i64> %a, <1 x i64> %b) #0 {
; %c = add <1 x i64> %a, %b
; %shl = shl <1 x i64> %c, <i64 56>
; %ashr = ashr <1 x i64> %shl, <i64 56>
-; store <1 x i64> %ashr, <1 x i64> addrspace(1)* %out, align 8
+; store <1 x i64> %ashr, ptr addrspace(1) %out, align 8
; ret void
; }
@@ -160,18 +160,18 @@ define amdgpu_kernel void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a
; SI: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i1_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
- %a = load i64, i64 addrspace(1)* %a.gep, align 8
- %b = load i64, i64 addrspace(1)* %b.gep, align 8
+ %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+ %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+ %a = load i64, ptr addrspace(1) %a.gep, align 8
+ %b = load i64, ptr addrspace(1) %b.gep, align 8
%c = shl i64 %a, %b
%shl = shl i64 %c, 63
%ashr = ashr i64 %shl, 63
- store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
+ store i64 %ashr, ptr addrspace(1) %out.gep, align 8
ret void
}
@@ -187,18 +187,18 @@ define amdgpu_kernel void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 a
; SI: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i8_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
- %a = load i64, i64 addrspace(1)* %a.gep, align 8
- %b = load i64, i64 addrspace(1)* %b.gep, align 8
+ %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+ %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+ %a = load i64, ptr addrspace(1) %a.gep, align 8
+ %b = load i64, ptr addrspace(1) %b.gep, align 8
%c = shl i64 %a, %b
%shl = shl i64 %c, 56
%ashr = ashr i64 %shl, 56
- store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
+ store i64 %ashr, ptr addrspace(1) %out.gep, align 8
ret void
}
@@ -214,18 +214,18 @@ define amdgpu_kernel void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 a
; SI: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
- %a = load i64, i64 addrspace(1)* %a.gep, align 8
- %b = load i64, i64 addrspace(1)* %b.gep, align 8
+ %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+ %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+ %a = load i64, ptr addrspace(1) %a.gep, align 8
+ %b = load i64, ptr addrspace(1) %b.gep, align 8
%c = shl i64 %a, %b
%shl = shl i64 %c, 48
%ashr = ashr i64 %shl, 48
- store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
+ store i64 %ashr, ptr addrspace(1) %out.gep, align 8
ret void
}
@@ -238,18 +238,18 @@ define amdgpu_kernel void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64
; GCN: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]]
; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[SHR]]]
-define amdgpu_kernel void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
- %a = load i64, i64 addrspace(1)* %a.gep, align 8
- %b = load i64, i64 addrspace(1)* %b.gep, align 8
+ %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+ %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+ %a = load i64, ptr addrspace(1) %a.gep, align 8
+ %b = load i64, ptr addrspace(1) %b.gep, align 8
%c = shl i64 %a, %b
%shl = shl i64 %c, 32
%ashr = ashr i64 %shl, 32
- store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
+ store i64 %ashr, ptr addrspace(1) %out.gep, align 8
ret void
}
@@ -264,11 +264,11 @@ define amdgpu_kernel void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64
; EG: LSHL
; EG: ASHR [[RES]]
; EG: LSHR {{\*?}} [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i1_in_i32_other_amount(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
%c = add i32 %a, %b
%x = shl i32 %c, 6
%y = ashr i32 %x, 7
- store i32 %y, i32 addrspace(1)* %out
+ store i32 %y, ptr addrspace(1) %out
ret void
}
@@ -287,11 +287,11 @@ define amdgpu_kernel void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)*
; EG: LSHL
; EG: ASHR [[RES]]
; EG: LSHR {{\*?}} [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i1_in_v2i32_other_amount(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) #0 {
%c = add <2 x i32> %a, %b
%x = shl <2 x i32> %c, <i32 6, i32 6>
%y = ashr <2 x i32> %x, <i32 7, i32 7>
- store <2 x i32> %y, <2 x i32> addrspace(1)* %out
+ store <2 x i32> %y, ptr addrspace(1) %out
ret void
}
@@ -305,11 +305,11 @@ define amdgpu_kernel void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addr
; EG: BFE_INT [[RES]]
; EG: BFE_INT [[RES]]
; EG: LSHR {{\*?}} [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i32(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) #0 {
%c = add <2 x i32> %a, %b ; add to prevent folding into extload
%shl = shl <2 x i32> %c, <i32 31, i32 31>
%ashr = ashr <2 x i32> %shl, <i32 31, i32 31>
- store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
+ store <2 x i32> %ashr, ptr addrspace(1) %out, align 8
ret void
}
@@ -326,11 +326,11 @@ define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %ou
; EG: BFE_INT [[RES]]
; EG: BFE_INT [[RES]]
; EG: LSHR {{\*?}} [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v4i1_to_v4i32(ptr addrspace(1) %out, <4 x i32> %a, <4 x i32> %b) #0 {
%c = add <4 x i32> %a, %b ; add to prevent folding into extload
%shl = shl <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
%ashr = ashr <4 x i32> %shl, <i32 31, i32 31, i32 31, i32 31>
- store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
+ store <4 x i32> %ashr, ptr addrspace(1) %out, align 8
ret void
}
@@ -343,11 +343,11 @@ define amdgpu_kernel void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %ou
; EG: BFE_INT [[RES]]
; EG: BFE_INT [[RES]]
; EG: LSHR {{\*?}} [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i32(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) #0 {
%c = add <2 x i32> %a, %b ; add to prevent folding into extload
%shl = shl <2 x i32> %c, <i32 24, i32 24>
%ashr = ashr <2 x i32> %shl, <i32 24, i32 24>
- store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
+ store <2 x i32> %ashr, ptr addrspace(1) %out, align 8
ret void
}
@@ -364,11 +364,11 @@ define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %ou
; EG: BFE_INT [[RES]]
; EG: BFE_INT [[RES]]
; EG: LSHR {{\*?}} [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v4i8_to_v4i32(ptr addrspace(1) %out, <4 x i32> %a, <4 x i32> %b) #0 {
%c = add <4 x i32> %a, %b ; add to prevent folding into extload
%shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
%ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
- store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
+ store <4 x i32> %ashr, ptr addrspace(1) %out, align 8
ret void
}
@@ -381,35 +381,35 @@ define amdgpu_kernel void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %ou
; EG: BFE_INT [[RES]]
; EG: BFE_INT [[RES]]
; EG: LSHR {{\*?}} [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i16_to_v2i32(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) #0 {
%c = add <2 x i32> %a, %b ; add to prevent folding into extload
%shl = shl <2 x i32> %c, <i32 16, i32 16>
%ashr = ashr <2 x i32> %shl, <i32 16, i32 16>
- store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
+ store <2 x i32> %ashr, ptr addrspace(1) %out, align 8
ret void
}
; FUNC-LABEL: {{^}}testcase:
-define amdgpu_kernel void @testcase(i8 addrspace(1)* %out, i8 %a) #0 {
+define amdgpu_kernel void @testcase(ptr addrspace(1) %out, i8 %a) #0 {
%and_a_1 = and i8 %a, 1
%cmp_eq = icmp eq i8 %and_a_1, 0
%cmp_slt = icmp slt i8 %a, 0
%sel0 = select i1 %cmp_slt, i8 0, i8 %a
%sel1 = select i1 %cmp_eq, i8 0, i8 %a
%xor = xor i8 %sel0, %sel1
- store i8 %xor, i8 addrspace(1)* %out
+ store i8 %xor, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}testcase_3:
-define amdgpu_kernel void @testcase_3(i8 addrspace(1)* %out, i8 %a) #0 {
+define amdgpu_kernel void @testcase_3(ptr addrspace(1) %out, i8 %a) #0 {
%and_a_1 = and i8 %a, 1
%cmp_eq = icmp eq i8 %and_a_1, 0
%cmp_slt = icmp slt i8 %a, 0
%sel0 = select i1 %cmp_slt, i8 0, i8 %a
%sel1 = select i1 %cmp_eq, i8 0, i8 %a
%xor = xor i8 %sel0, %sel1
- store i8 %xor, i8 addrspace(1)* %out
+ store i8 %xor, ptr addrspace(1) %out
ret void
}
@@ -418,26 +418,26 @@ define amdgpu_kernel void @testcase_3(i8 addrspace(1)* %out, i8 %a) #0 {
; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
-define amdgpu_kernel void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) #0 {
- %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16
- %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16
+define amdgpu_kernel void @vgpr_sext_in_reg_v4i8_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) #0 {
+ %loada = load <4 x i32>, ptr addrspace(1) %a, align 16
+ %loadb = load <4 x i32>, ptr addrspace(1) %b, align 16
%c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
%shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
%ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
- store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
+ store <4 x i32> %ashr, ptr addrspace(1) %out, align 8
ret void
}
; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i16_to_v4i32:
; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
-define amdgpu_kernel void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) #0 {
- %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16
- %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16
+define amdgpu_kernel void @vgpr_sext_in_reg_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) #0 {
+ %loada = load <4 x i32>, ptr addrspace(1) %a, align 16
+ %loadb = load <4 x i32>, ptr addrspace(1) %b, align 16
%c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
%shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>
%ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16>
- store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
+ store <4 x i32> %ashr, ptr addrspace(1) %out, align 8
ret void
}
@@ -446,14 +446,14 @@ define amdgpu_kernel void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1
; GCN: v_max_i32
; GCN-NOT: bfe
; GCN: buffer_store_short
-define amdgpu_kernel void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) #0 {
- %tmp5 = load i8, i8 addrspace(1)* %src, align 1
+define amdgpu_kernel void @sext_in_reg_to_illegal_type(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %src) #0 {
+ %tmp5 = load i8, ptr addrspace(1) %src, align 1
%tmp2 = sext i8 %tmp5 to i32
%tmp2.5 = icmp sgt i32 %tmp2, 0
%tmp3 = select i1 %tmp2.5, i32 %tmp2, i32 0
%tmp4 = trunc i32 %tmp3 to i8
%tmp6 = sext i8 %tmp4 to i16
- store i16 %tmp6, i16 addrspace(1)* %out, align 2
+ store i16 %tmp6, ptr addrspace(1) %out, align 2
ret void
}
@@ -472,20 +472,20 @@ define amdgpu_kernel void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocaptu
; GCN-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[HI]]
; SI: buffer_store_dwordx2 v[[[RESULT_LO]]:[[RESULT_HI]]]
; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[RESULT_LO]]:[[RESULT_HI]]]
-define amdgpu_kernel void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i1_to_i64_move_use(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, i64 %s.val) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
- %a = load i64, i64 addrspace(1)* %a.gep, align 8
- %b = load i64, i64 addrspace(1)* %b.gep, align 8
+ %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+ %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+ %a = load i64, ptr addrspace(1) %a.gep, align 8
+ %b = load i64, ptr addrspace(1) %b.gep, align 8
%c = shl i64 %a, %b
%shl = shl i64 %c, 63
%ashr = ashr i64 %shl, 63
%and = and i64 %ashr, %s.val
- store i64 %and, i64 addrspace(1)* %out.gep, align 8
+ store i64 %and, ptr addrspace(1) %out.gep, align 8
ret void
}
@@ -502,19 +502,19 @@ define amdgpu_kernel void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %o
; SI: buffer_store_dwordx2 v[[[RESULT_LO]]:[[RESULT_HI]]]
; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[RESULT_LO]]:[[RESULT_HI]]]
-define amdgpu_kernel void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i32_to_i64_move_use(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, i64 %s.val) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
- %a = load i64, i64 addrspace(1)* %a.gep, align 8
- %b = load i64, i64 addrspace(1)* %b.gep, align 8
+ %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+ %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+ %a = load i64, ptr addrspace(1) %a.gep, align 8
+ %b = load i64, ptr addrspace(1) %b.gep, align 8
%c = shl i64 %a, %b
%shl = shl i64 %c, 32
%ashr = ashr i64 %shl, 32
%and = and i64 %ashr, %s.val
- store i64 %and, i64 addrspace(1)* %out.gep, align 8
+ store i64 %and, ptr addrspace(1) %out.gep, align 8
ret void
}
@@ -528,12 +528,12 @@ define amdgpu_kernel void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %
; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15
; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15
-define amdgpu_kernel void @s_sext_in_reg_i1_i16(i16 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
- %ld = load i32, i32 addrspace(4)* %ptr
+define amdgpu_kernel void @s_sext_in_reg_i1_i16(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 {
+ %ld = load i32, ptr addrspace(4) %ptr
%in = trunc i32 %ld to i16
%shl = shl i16 %in, 15
%sext = ashr i16 %shl, 15
- store i16 %sext, i16 addrspace(1)* %out
+ store i16 %sext, ptr addrspace(1) %out
ret void
}
@@ -547,12 +547,12 @@ define amdgpu_kernel void @s_sext_in_reg_i1_i16(i16 addrspace(1)* %out, i32 addr
; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14
; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14
-define amdgpu_kernel void @s_sext_in_reg_i2_i16(i16 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
- %ld = load i32, i32 addrspace(4)* %ptr
+define amdgpu_kernel void @s_sext_in_reg_i2_i16(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 {
+ %ld = load i32, ptr addrspace(4) %ptr
%in = trunc i32 %ld to i16
%shl = shl i16 %in, 14
%sext = ashr i16 %shl, 14
- store i16 %sext, i16 addrspace(1)* %out
+ store i16 %sext, ptr addrspace(1) %out
ret void
}
@@ -561,15 +561,15 @@ define amdgpu_kernel void @s_sext_in_reg_i2_i16(i16 addrspace(1)* %out, i32 addr
; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[VAL]], 0, 1{{$}}
; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]]
-define amdgpu_kernel void @v_sext_in_reg_i1_i16(i16 addrspace(3)* %out, i16 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i1_i16(ptr addrspace(3) %out, ptr addrspace(1) %ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i16, i16 addrspace(1)* %ptr, i32 %tid
- %out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid
+ %gep = getelementptr i16, ptr addrspace(1) %ptr, i32 %tid
+ %out.gep = getelementptr i16, ptr addrspace(3) %out, i32 %tid
- %in = load i16, i16 addrspace(1)* %gep
+ %in = load i16, ptr addrspace(1) %gep
%shl = shl i16 %in, 15
%sext = ashr i16 %shl, 15
- store i16 %sext, i16 addrspace(3)* %out.gep
+ store i16 %sext, ptr addrspace(3) %out.gep
ret void
}
@@ -582,19 +582,19 @@ define amdgpu_kernel void @v_sext_in_reg_i1_i16(i16 addrspace(3)* %out, i16 addr
; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[REG]], 0, 1{{$}}
; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]]
-define amdgpu_kernel void @v_sext_in_reg_i1_i16_nonload(i16 addrspace(3)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 %s.val) nounwind {
+define amdgpu_kernel void @v_sext_in_reg_i1_i16_nonload(ptr addrspace(3) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, i16 %s.val) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %a.gep = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
- %b.gep = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
- %out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid
- %a = load volatile i16, i16 addrspace(1)* %a.gep, align 2
- %b = load volatile i16, i16 addrspace(1)* %b.gep, align 2
+ %a.gep = getelementptr i16, ptr addrspace(1) %aptr, i32 %tid
+ %b.gep = getelementptr i16, ptr addrspace(1) %bptr, i32 %tid
+ %out.gep = getelementptr i16, ptr addrspace(3) %out, i32 %tid
+ %a = load volatile i16, ptr addrspace(1) %a.gep, align 2
+ %b = load volatile i16, ptr addrspace(1) %b.gep, align 2
%c = shl i16 %a, %b
%shl = shl i16 %c, 15
%ashr = ashr i16 %shl, 15
- store i16 %ashr, i16 addrspace(3)* %out.gep, align 2
+ store i16 %ashr, ptr addrspace(3) %out.gep, align 2
ret void
}
@@ -608,10 +608,10 @@ define amdgpu_kernel void @v_sext_in_reg_i1_i16_nonload(i16 addrspace(3)* %out,
; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14{{$}}
; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14{{$}}
-define amdgpu_kernel void @s_sext_in_reg_i2_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 {
+define amdgpu_kernel void @s_sext_in_reg_i2_i16_arg(ptr addrspace(1) %out, i16 %in) #0 {
%shl = shl i16 %in, 14
%sext = ashr i16 %shl, 14
- store i16 %sext, i16 addrspace(1)* %out
+ store i16 %sext, ptr addrspace(1) %out
ret void
}
@@ -625,10 +625,10 @@ define amdgpu_kernel void @s_sext_in_reg_i2_i16_arg(i16 addrspace(1)* %out, i16
; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8{{$}}
; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8{{$}}
-define amdgpu_kernel void @s_sext_in_reg_i8_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 {
+define amdgpu_kernel void @s_sext_in_reg_i8_i16_arg(ptr addrspace(1) %out, i16 %in) #0 {
%shl = shl i16 %in, 8
%sext = ashr i16 %shl, 8
- store i16 %sext, i16 addrspace(1)* %out
+ store i16 %sext, ptr addrspace(1) %out
ret void
}
@@ -642,10 +642,10 @@ define amdgpu_kernel void @s_sext_in_reg_i8_i16_arg(i16 addrspace(1)* %out, i16
; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1{{$}}
; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1{{$}}
-define amdgpu_kernel void @s_sext_in_reg_i15_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 {
+define amdgpu_kernel void @s_sext_in_reg_i15_i16_arg(ptr addrspace(1) %out, i16 %in) #0 {
%shl = shl i16 %in, 1
%sext = ashr i16 %shl, 1
- store i16 %sext, i16 addrspace(1)* %out
+ store i16 %sext, ptr addrspace(1) %out
ret void
}
@@ -653,11 +653,11 @@ define amdgpu_kernel void @s_sext_in_reg_i15_i16_arg(i16 addrspace(1)* %out, i16
; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]]
; GFX9: v_pk_lshlrev_b16 [[SHL:v[0-9]+]], 15, [[ADD]]
; GFX9: v_pk_ashrrev_i16 [[SRA:v[0-9]+]], 15, [[SHL]]
-define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i16(ptr addrspace(1) %out, <2 x i16> %a, <2 x i16> %b) #0 {
%c = add <2 x i16> %a, %b ; add to prevent folding into extload
%shl = shl <2 x i16> %c, <i16 15, i16 15>
%ashr = ashr <2 x i16> %shl, <i16 15, i16 15>
- store <2 x i16> %ashr, <2 x i16> addrspace(1)* %out
+ store <2 x i16> %ashr, ptr addrspace(1) %out
ret void
}
@@ -668,11 +668,11 @@ define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i16(<2 x i16> addrspace(1)* %ou
; GFX9: v_pk_lshlrev_b16 v{{[0-9]+}}, 15, v{{[0-9]+}}
; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 15, v{{[0-9]+}}
; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 15, v{{[0-9]+}}
-define amdgpu_kernel void @sext_in_reg_v3i1_to_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %a, <3 x i16> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v3i1_to_v3i16(ptr addrspace(1) %out, <3 x i16> %a, <3 x i16> %b) #0 {
%c = add <3 x i16> %a, %b ; add to prevent folding into extload
%shl = shl <3 x i16> %c, <i16 15, i16 15, i16 15>
%ashr = ashr <3 x i16> %shl, <i16 15, i16 15, i16 15>
- store <3 x i16> %ashr, <3 x i16> addrspace(1)* %out
+ store <3 x i16> %ashr, ptr addrspace(1) %out
ret void
}
@@ -680,11 +680,11 @@ define amdgpu_kernel void @sext_in_reg_v3i1_to_v3i16(<3 x i16> addrspace(1)* %ou
; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]]
; GFX9: v_pk_lshlrev_b16 [[SHL:v[0-9]+]], 14, [[ADD]]
; GFX9: v_pk_ashrrev_i16 [[SRA:v[0-9]+]], 14, [[SHL]]
-define amdgpu_kernel void @sext_in_reg_v2i2_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i2_to_v2i16(ptr addrspace(1) %out, <2 x i16> %a, <2 x i16> %b) #0 {
%c = add <2 x i16> %a, %b ; add to prevent folding into extload
%shl = shl <2 x i16> %c, <i16 14, i16 14>
%ashr = ashr <2 x i16> %shl, <i16 14, i16 14>
- store <2 x i16> %ashr, <2 x i16> addrspace(1)* %out
+ store <2 x i16> %ashr, ptr addrspace(1) %out
ret void
}
@@ -692,11 +692,11 @@ define amdgpu_kernel void @sext_in_reg_v2i2_to_v2i16(<2 x i16> addrspace(1)* %ou
; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]]
; GFX9: v_pk_lshlrev_b16 [[SHL:v[0-9]+]], 8, [[ADD]]
; GFX9: v_pk_ashrrev_i16 [[SRA:v[0-9]+]], 8, [[SHL]]
-define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i16(ptr addrspace(1) %out, <2 x i16> %a, <2 x i16> %b) #0 {
%c = add <2 x i16> %a, %b ; add to prevent folding into extload
%shl = shl <2 x i16> %c, <i16 8, i16 8>
%ashr = ashr <2 x i16> %shl, <i16 8, i16 8>
- store <2 x i16> %ashr, <2 x i16> addrspace(1)* %out
+ store <2 x i16> %ashr, ptr addrspace(1) %out
ret void
}
@@ -707,11 +707,11 @@ define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i16(<2 x i16> addrspace(1)* %ou
; GFX9: v_pk_lshlrev_b16 v{{[0-9]+}}, 8, v{{[0-9]+}}
; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 8, v{{[0-9]+}}
; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 8, v{{[0-9]+}}
-define amdgpu_kernel void @sext_in_reg_v3i8_to_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %a, <3 x i16> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v3i8_to_v3i16(ptr addrspace(1) %out, <3 x i16> %a, <3 x i16> %b) #0 {
%c = add <3 x i16> %a, %b ; add to prevent folding into extload
%shl = shl <3 x i16> %c, <i16 8, i16 8, i16 8>
%ashr = ashr <3 x i16> %shl, <i16 8, i16 8, i16 8>
- store <3 x i16> %ashr, <3 x i16> addrspace(1)* %out
+ store <3 x i16> %ashr, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/sign_extend.ll b/llvm/test/CodeGen/AMDGPU/sign_extend.ll
index 5dd8bee7b1e4..9a03d216c7a9 100644
--- a/llvm/test/CodeGen/AMDGPU/sign_extend.ll
+++ b/llvm/test/CodeGen/AMDGPU/sign_extend.ll
@@ -2,7 +2,7 @@
; RUN: llc -mtriple=amdgcn-- -amdgpu-scalarize-global-loads=false -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s -allow-deprecated-dag-overlap -enable-var-scope --check-prefix=SI
; RUN: llc -mtriple=amdgcn-- -amdgpu-scalarize-global-loads=false -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -allow-deprecated-dag-overlap -enable-var-scope --check-prefix=VI
-define amdgpu_kernel void @s_sext_i1_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @s_sext_i1_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
; SI-LABEL: s_sext_i1_to_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -32,11 +32,11 @@ define amdgpu_kernel void @s_sext_i1_to_i32(i32 addrspace(1)* %out, i32 %a, i32
; VI-NEXT: s_endpgm
%cmp = icmp eq i32 %a, %b
%sext = sext i1 %cmp to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
+ store i32 %sext, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @test_s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind {
+define amdgpu_kernel void @test_s_sext_i32_to_i64(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) nounwind {
; SI-LABEL: test_s_sext_i32_to_i64:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -74,11 +74,11 @@ entry:
%mul = mul i32 %a, %b
%add = add i32 %mul, %c
%sext = sext i32 %add to i64
- store i64 %sext, i64 addrspace(1)* %out, align 8
+ store i64 %sext, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @s_sext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @s_sext_i1_to_i64(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
; SI-LABEL: s_sext_i1_to_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -110,11 +110,11 @@ define amdgpu_kernel void @s_sext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32
; VI-NEXT: s_endpgm
%cmp = icmp eq i32 %a, %b
%sext = sext i1 %cmp to i64
- store i64 %sext, i64 addrspace(1)* %out, align 8
+ store i64 %sext, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a) nounwind {
+define amdgpu_kernel void @s_sext_i32_to_i64(ptr addrspace(1) %out, i32 %a) nounwind {
; SI-LABEL: s_sext_i32_to_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xb
@@ -141,11 +141,11 @@ define amdgpu_kernel void @s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a) nou
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
%sext = sext i32 %a to i64
- store i64 %sext, i64 addrspace(1)* %out, align 8
+ store i64 %sext, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @v_sext_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; SI-LABEL: v_sext_i32_to_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -181,13 +181,13 @@ define amdgpu_kernel void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspa
; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; VI-NEXT: s_endpgm
- %val = load i32, i32 addrspace(1)* %in, align 4
+ %val = load i32, ptr addrspace(1) %in, align 4
%sext = sext i32 %val to i64
- store i64 %sext, i64 addrspace(1)* %out, align 8
+ store i64 %sext, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @s_sext_i16_to_i64(i64 addrspace(1)* %out, i16 %a) nounwind {
+define amdgpu_kernel void @s_sext_i16_to_i64(ptr addrspace(1) %out, i16 %a) nounwind {
; SI-LABEL: s_sext_i16_to_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xb
@@ -214,11 +214,11 @@ define amdgpu_kernel void @s_sext_i16_to_i64(i64 addrspace(1)* %out, i16 %a) nou
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
%sext = sext i16 %a to i64
- store i64 %sext, i64 addrspace(1)* %out, align 8
+ store i64 %sext, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @s_sext_i1_to_i16(i16 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+define amdgpu_kernel void @s_sext_i1_to_i16(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
; SI-LABEL: s_sext_i1_to_i16:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -248,7 +248,7 @@ define amdgpu_kernel void @s_sext_i1_to_i16(i16 addrspace(1)* %out, i32 %a, i32
; VI-NEXT: s_endpgm
%cmp = icmp eq i32 %a, %b
%sext = sext i1 %cmp to i16
- store i16 %sext, i16 addrspace(1)* %out
+ store i16 %sext, ptr addrspace(1) %out
ret void
}
@@ -256,7 +256,7 @@ define amdgpu_kernel void @s_sext_i1_to_i16(i16 addrspace(1)* %out, i32 %a, i32
; makes it all the way throught the legalizer/optimizer to make sure
; we select this correctly. In the s_sext_i1_to_i16, the sign_extend node
; is optimized to a select very early.
-define amdgpu_kernel void @s_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+define amdgpu_kernel void @s_sext_i1_to_i16_with_and(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
; SI-LABEL: s_sext_i1_to_i16_with_and:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xb
@@ -292,11 +292,11 @@ define amdgpu_kernel void @s_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32
%cmp1 = icmp eq i32 %c, %d
%cmp = and i1 %cmp0, %cmp1
%sext = sext i1 %cmp to i16
- store i16 %sext, i16 addrspace(1)* %out
+ store i16 %sext, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind {
+define amdgpu_kernel void @v_sext_i1_to_i16_with_and(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) nounwind {
; SI-LABEL: v_sext_i1_to_i16_with_and:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -335,7 +335,7 @@ define amdgpu_kernel void @v_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32
%cmp1 = icmp eq i32 %b, %c
%cmp = and i1 %cmp0, %cmp1
%sext = sext i1 %cmp to i16
- store i16 %sext, i16 addrspace(1)* %out
+ store i16 %sext, ptr addrspace(1) %out
ret void
}
@@ -347,7 +347,7 @@ define amdgpu_kernel void @v_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32
; t55: i16 = srl t29, Constant:i32<8>
; t63: i32 = any_extend t55
; t64: i32 = sign_extend_inreg t63, ValueType:ch:i8
-define amdgpu_kernel void @s_sext_v4i8_to_v4i32(i32 addrspace(1)* %out, i32 %a) nounwind {
+define amdgpu_kernel void @s_sext_v4i8_to_v4i32(ptr addrspace(1) %out, i32 %a) nounwind {
; SI-LABEL: s_sext_v4i8_to_v4i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xb
@@ -403,16 +403,16 @@ define amdgpu_kernel void @s_sext_v4i8_to_v4i32(i32 addrspace(1)* %out, i32 %a)
%elt1 = extractelement <4 x i32> %ext, i32 1
%elt2 = extractelement <4 x i32> %ext, i32 2
%elt3 = extractelement <4 x i32> %ext, i32 3
- store volatile i32 %elt0, i32 addrspace(1)* %out
- store volatile i32 %elt1, i32 addrspace(1)* %out
- store volatile i32 %elt2, i32 addrspace(1)* %out
- store volatile i32 %elt3, i32 addrspace(1)* %out
+ store volatile i32 %elt0, ptr addrspace(1) %out
+ store volatile i32 %elt1, ptr addrspace(1) %out
+ store volatile i32 %elt2, ptr addrspace(1) %out
+ store volatile i32 %elt3, ptr addrspace(1) %out
ret void
}
; FIXME: need to optimize same sequence as above test to avoid
; this shift.
-define amdgpu_kernel void @v_sext_v4i8_to_v4i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @v_sext_v4i8_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; SI-LABEL: v_sext_v4i8_to_v4i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -469,22 +469,22 @@ define amdgpu_kernel void @v_sext_v4i8_to_v4i32(i32 addrspace(1)* %out, i32 addr
; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_endpgm
- %a = load i32, i32 addrspace(1)* %in
+ %a = load i32, ptr addrspace(1) %in
%cast = bitcast i32 %a to <4 x i8>
%ext = sext <4 x i8> %cast to <4 x i32>
%elt0 = extractelement <4 x i32> %ext, i32 0
%elt1 = extractelement <4 x i32> %ext, i32 1
%elt2 = extractelement <4 x i32> %ext, i32 2
%elt3 = extractelement <4 x i32> %ext, i32 3
- store volatile i32 %elt0, i32 addrspace(1)* %out
- store volatile i32 %elt1, i32 addrspace(1)* %out
- store volatile i32 %elt2, i32 addrspace(1)* %out
- store volatile i32 %elt3, i32 addrspace(1)* %out
+ store volatile i32 %elt0, ptr addrspace(1) %out
+ store volatile i32 %elt1, ptr addrspace(1) %out
+ store volatile i32 %elt2, ptr addrspace(1) %out
+ store volatile i32 %elt3, ptr addrspace(1) %out
ret void
}
; FIXME: s_bfe_i64, same on SI and VI
-define amdgpu_kernel void @s_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 %a) nounwind {
+define amdgpu_kernel void @s_sext_v4i16_to_v4i32(ptr addrspace(1) %out, i64 %a) nounwind {
; SI-LABEL: s_sext_v4i16_to_v4i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -542,14 +542,14 @@ define amdgpu_kernel void @s_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 %a)
%elt1 = extractelement <4 x i32> %ext, i32 1
%elt2 = extractelement <4 x i32> %ext, i32 2
%elt3 = extractelement <4 x i32> %ext, i32 3
- store volatile i32 %elt0, i32 addrspace(1)* %out
- store volatile i32 %elt1, i32 addrspace(1)* %out
- store volatile i32 %elt2, i32 addrspace(1)* %out
- store volatile i32 %elt3, i32 addrspace(1)* %out
+ store volatile i32 %elt0, ptr addrspace(1) %out
+ store volatile i32 %elt1, ptr addrspace(1) %out
+ store volatile i32 %elt2, ptr addrspace(1) %out
+ store volatile i32 %elt3, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @v_sext_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; SI-LABEL: v_sext_v4i16_to_v4i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -605,17 +605,17 @@ define amdgpu_kernel void @v_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 add
; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_endpgm
- %a = load i64, i64 addrspace(1)* %in
+ %a = load i64, ptr addrspace(1) %in
%cast = bitcast i64 %a to <4 x i16>
%ext = sext <4 x i16> %cast to <4 x i32>
%elt0 = extractelement <4 x i32> %ext, i32 0
%elt1 = extractelement <4 x i32> %ext, i32 1
%elt2 = extractelement <4 x i32> %ext, i32 2
%elt3 = extractelement <4 x i32> %ext, i32 3
- store volatile i32 %elt0, i32 addrspace(1)* %out
- store volatile i32 %elt1, i32 addrspace(1)* %out
- store volatile i32 %elt2, i32 addrspace(1)* %out
- store volatile i32 %elt3, i32 addrspace(1)* %out
+ store volatile i32 %elt0, ptr addrspace(1) %out
+ store volatile i32 %elt1, ptr addrspace(1) %out
+ store volatile i32 %elt2, ptr addrspace(1) %out
+ store volatile i32 %elt3, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
index f4ff6c13150c..d1f05358ff13 100644
--- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
@@ -4,7 +4,7 @@
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-define amdgpu_kernel void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @sint_to_fp_i32_to_f64(ptr addrspace(1) %out, i32 %in) {
; CI-LABEL: sint_to_fp_i32_to_f64:
; CI: ; %bb.0:
; CI-NEXT: s_load_dword s2, s[4:5], 0x2
@@ -27,13 +27,13 @@ define amdgpu_kernel void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
%result = sitofp i32 %in to double
- store double %result, double addrspace(1)* %out
+ store double %result, ptr addrspace(1) %out
ret void
}
; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
; uses an SGPR (implicit vcc).
-define amdgpu_kernel void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @sint_to_fp_i1_f64(ptr addrspace(1) %out, i32 %in) {
; CI-LABEL: sint_to_fp_i1_f64:
; CI: ; %bb.0:
; CI-NEXT: s_load_dword s2, s[4:5], 0x2
@@ -63,11 +63,11 @@ define amdgpu_kernel void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in)
; VI-NEXT: s_endpgm
%cmp = icmp eq i32 %in, 0
%fp = sitofp i1 %cmp to double
- store double %fp, double addrspace(1)* %out, align 4
+ store double %fp, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @sint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 %in) {
+define amdgpu_kernel void @sint_to_fp_i1_f64_load(ptr addrspace(1) %out, i1 %in) {
; CI-LABEL: sint_to_fp_i1_f64_load:
; CI: ; %bb.0:
; CI-NEXT: s_load_dword s2, s[4:5], 0x2
@@ -96,11 +96,11 @@ define amdgpu_kernel void @sint_to_fp_i1_f64_load(double addrspace(1)* %out, i1
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
%fp = sitofp i1 %in to double
- store double %fp, double addrspace(1)* %out, align 8
+ store double %fp, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) {
+define amdgpu_kernel void @s_sint_to_fp_i64_to_f64(ptr addrspace(1) %out, i64 %in) {
; CI-LABEL: s_sint_to_fp_i64_to_f64:
; CI: ; %bb.0:
; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -127,11 +127,11 @@ define amdgpu_kernel void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i6
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
%result = sitofp i64 %in to double
- store double %result, double addrspace(1)* %out
+ store double %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @v_sint_to_fp_i64_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; CI-LABEL: v_sint_to_fp_i64_to_f64:
; CI: ; %bb.0:
; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -170,15 +170,15 @@ define amdgpu_kernel void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i6
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
- %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %val = load i64, i64 addrspace(1)* %gep, align 8
+ %gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+ %val = load i64, ptr addrspace(1) %gep, align 8
%result = sitofp i64 %val to double
- store double %result, double addrspace(1)* %out
+ store double %result, ptr addrspace(1) %out
ret void
}
; FIXME: bfe and sext on VI+
-define amdgpu_kernel void @s_sint_to_fp_i8_to_f64(double addrspace(1)* %out, i8 %in) {
+define amdgpu_kernel void @s_sint_to_fp_i8_to_f64(ptr addrspace(1) %out, i8 %in) {
; CI-LABEL: s_sint_to_fp_i8_to_f64:
; CI: ; %bb.0:
; CI-NEXT: s_load_dword s2, s[4:5], 0x2
@@ -204,7 +204,7 @@ define amdgpu_kernel void @s_sint_to_fp_i8_to_f64(double addrspace(1)* %out, i8
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
%fp = sitofp i8 %in to double
- store double %fp, double addrspace(1)* %out
+ store double %fp, ptr addrspace(1) %out
ret void
}
@@ -227,7 +227,7 @@ define double @v_sint_to_fp_i8_to_f64(i8 %in) {
ret double %fp
}
-define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
; CI-LABEL: s_select_sint_to_fp_i1_vals_f64:
; CI: ; %bb.0:
; CI-NEXT: s_load_dword s2, s[4:5], 0x2
@@ -257,11 +257,11 @@ define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_f64(double addrspace(1)*
; VI-NEXT: s_endpgm
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, double -1.0, double 0.0
- store double %select, double addrspace(1)* %out, align 8
+ store double %select, ptr addrspace(1) %out, align 8
ret void
}
-define void @v_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+define void @v_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
; GCN-LABEL: v_select_sint_to_fp_i1_vals_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -274,11 +274,11 @@ define void @v_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in)
; GCN-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, double -1.0, double 0.0
- store double %select, double addrspace(1)* %out, align 8
+ store double %select, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) {
; CI-LABEL: s_select_sint_to_fp_i1_vals_i64:
; CI: ; %bb.0:
; CI-NEXT: s_load_dword s2, s[4:5], 0x2
@@ -308,11 +308,11 @@ define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_i64(i64 addrspace(1)* %ou
; VI-NEXT: s_endpgm
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, i64 u0xbff0000000000000, i64 0
- store i64 %select, i64 addrspace(1)* %out, align 8
+ store i64 %select, ptr addrspace(1) %out, align 8
ret void
}
-define void @v_select_sint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) {
+define void @v_select_sint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) {
; GCN-LABEL: v_select_sint_to_fp_i1_vals_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -325,12 +325,12 @@ define void @v_select_sint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) {
; GCN-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, i64 u0xbff0000000000000, i64 0
- store i64 %select, i64 addrspace(1)* %out, align 8
+ store i64 %select, ptr addrspace(1) %out, align 8
ret void
}
; TODO: This should swap the selected order / invert the compare and do it.
-define void @v_swap_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+define void @v_swap_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
; GCN-LABEL: v_swap_select_sint_to_fp_i1_vals_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -343,12 +343,12 @@ define void @v_swap_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32
; GCN-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, double 0.0, double -1.0
- store double %select, double addrspace(1)* %out, align 8
+ store double %select, ptr addrspace(1) %out, align 8
ret void
}
; TODO: This should swap the selected order / invert the compare and do it.
-define amdgpu_kernel void @s_swap_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_swap_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
; CI-LABEL: s_swap_select_sint_to_fp_i1_vals_f64:
; CI: ; %bb.0:
; CI-NEXT: s_load_dword s2, s[4:5], 0x2
@@ -378,6 +378,6 @@ define amdgpu_kernel void @s_swap_select_sint_to_fp_i1_vals_f64(double addrspace
; VI-NEXT: s_endpgm
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, double 0.0, double -1.0
- store double %select, double addrspace(1)* %out, align 8
+ store double %select, ptr addrspace(1) %out, align 8
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
index 27bfcce135f3..f7d57da9b2bf 100644
--- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
@@ -4,7 +4,7 @@
; FIXME: This should be merged with sint_to_fp.ll, but s_sint_to_fp_v2i64 crashes on r600
-define amdgpu_kernel void @s_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 %in) #0 {
+define amdgpu_kernel void @s_sint_to_fp_i64_to_f16(ptr addrspace(1) %out, i64 %in) #0 {
; GFX6-LABEL: s_sint_to_fp_i64_to_f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -51,11 +51,11 @@ define amdgpu_kernel void @s_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
; GFX8-NEXT: flat_store_short v[0:1], v2
; GFX8-NEXT: s_endpgm
%result = sitofp i64 %in to half
- store half %result, half addrspace(1)* %out
+ store half %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_i64_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX6-LABEL: v_sint_to_fp_i64_to_f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -115,15 +115,15 @@ define amdgpu_kernel void @v_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
; GFX8-NEXT: flat_store_short v[0:1], v3
; GFX8-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
- %val = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+ %out.gep = getelementptr half, ptr addrspace(1) %out, i32 %tid
+ %val = load i64, ptr addrspace(1) %in.gep
%result = sitofp i64 %val to half
- store half %result, half addrspace(1)* %out.gep
+ store half %result, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @s_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 {
+define amdgpu_kernel void @s_sint_to_fp_i64_to_f32(ptr addrspace(1) %out, i64 %in) #0 {
; GFX6-LABEL: s_sint_to_fp_i64_to_f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -168,11 +168,11 @@ define amdgpu_kernel void @s_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
%result = sitofp i64 %in to float
- store float %result, float addrspace(1)* %out
+ store float %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_i64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX6-LABEL: v_sint_to_fp_i64_to_f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -230,15 +230,15 @@ define amdgpu_kernel void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %val = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+ %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+ %val = load i64, ptr addrspace(1) %in.gep
%result = sitofp i64 %val to float
- store float %result, float addrspace(1)* %out.gep
+ store float %result, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i64> %in) #0{
+define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f32(ptr addrspace(1) %out, <2 x i64> %in) #0{
; GFX6-LABEL: s_sint_to_fp_v2i64_to_v2f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
@@ -307,11 +307,11 @@ define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)*
; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX8-NEXT: s_endpgm
%result = sitofp <2 x i64> %in to <2 x float>
- store <2 x float> %result, <2 x float> addrspace(1)* %out
+ store <2 x float> %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX6-LABEL: v_sint_to_fp_v4i64_to_v4f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -448,15 +448,15 @@ define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)*
; GFX8-NEXT: flat_store_dwordx4 v[9:10], v[0:3]
; GFX8-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
- %value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep
+ %in.gep = getelementptr <4 x i64>, ptr addrspace(1) %in, i32 %tid
+ %out.gep = getelementptr <4 x float>, ptr addrspace(1) %out, i32 %tid
+ %value = load <4 x i64>, ptr addrspace(1) %in.gep
%result = sitofp <4 x i64> %value to <4 x float>
- store <4 x float> %result, <4 x float> addrspace(1)* %out.gep
+ store <4 x float> %result, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)* %out, <2 x i64> %in) #0{
+define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f16(ptr addrspace(1) %out, <2 x i64> %in) #0{
; GFX6-LABEL: s_sint_to_fp_v2i64_to_v2f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
@@ -532,11 +532,11 @@ define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)*
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
%result = sitofp <2 x i64> %in to <2 x half>
- store <2 x half> %result, <2 x half> addrspace(1)* %out
+ store <2 x half> %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX6-LABEL: v_sint_to_fp_v4i64_to_v4f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -687,11 +687,11 @@ define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)*
; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GFX8-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr <4 x half>, <4 x half> addrspace(1)* %out, i32 %tid
- %value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep
+ %in.gep = getelementptr <4 x i64>, ptr addrspace(1) %in, i32 %tid
+ %out.gep = getelementptr <4 x half>, ptr addrspace(1) %out, i32 %tid
+ %value = load <4 x i64>, ptr addrspace(1) %in.gep
%result = sitofp <4 x i64> %value to <4 x half>
- store <4 x half> %result, <4 x half> addrspace(1)* %out.gep
+ store <4 x half> %result, ptr addrspace(1) %out.gep
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.ll
index b62b0def34cf..73f81404f0e1 100644
--- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.ll
+++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.ll
@@ -6,9 +6,9 @@
; SI: v_cvt_f32_i32_e32 {{v[0-9]+}}, {{s[0-9]+$}}
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-define amdgpu_kernel void @s_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) #0 {
+define amdgpu_kernel void @s_sint_to_fp_i32_to_f32(ptr addrspace(1) %out, i32 %in) #0 {
%result = sitofp i32 %in to float
- store float %result, float addrspace(1)* %out
+ store float %result, ptr addrspace(1) %out
ret void
}
@@ -16,13 +16,13 @@ define amdgpu_kernel void @s_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32
; SI: v_cvt_f32_i32_e32 {{v[0-9]+}}, {{v[0-9]+$}}
; R600: INT_TO_FLT
-define amdgpu_kernel void @v_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_i32_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %val = load i32, i32 addrspace(1)* %in.gep
+ %in.gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid
+ %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep
%result = sitofp i32 %val to float
- store float %result, float addrspace(1)* %out.gep
+ store float %result, ptr addrspace(1) %out.gep
ret void
}
@@ -32,9 +32,9 @@ define amdgpu_kernel void @v_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32
; R600-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
; R600-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
-define amdgpu_kernel void @s_sint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) #0{
+define amdgpu_kernel void @s_sint_to_fp_v2i32(ptr addrspace(1) %out, <2 x i32> %in) #0{
%result = sitofp <2 x i32> %in to <2 x float>
- store <2 x float> %result, <2 x float> addrspace(1)* %out
+ store <2 x float> %result, ptr addrspace(1) %out
ret void
}
@@ -49,10 +49,10 @@ define amdgpu_kernel void @s_sint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @s_sint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
- %value = load <4 x i32>, <4 x i32> addrspace(1) * %in
+define amdgpu_kernel void @s_sint_to_fp_v4i32_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+ %value = load <4 x i32>, ptr addrspace(1) %in
%result = sitofp <4 x i32> %value to <4 x float>
- store <4 x float> %result, <4 x float> addrspace(1)* %out
+ store <4 x float> %result, ptr addrspace(1) %out
ret void
}
@@ -66,13 +66,13 @@ define amdgpu_kernel void @s_sint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)*
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @v_sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
- %value = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep
+ %in.gep = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 %tid
+ %out.gep = getelementptr <4 x float>, ptr addrspace(1) %out, i32 %tid
+ %value = load <4 x i32>, ptr addrspace(1) %in.gep
%result = sitofp <4 x i32> %value to <4 x float>
- store <4 x float> %result, <4 x float> addrspace(1)* %out.gep
+ store <4 x float> %result, ptr addrspace(1) %out.gep
ret void
}
@@ -82,10 +82,10 @@ define amdgpu_kernel void @v_sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
-define amdgpu_kernel void @s_sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) #0 {
+define amdgpu_kernel void @s_sint_to_fp_i1_f32(ptr addrspace(1) %out, i32 %in) #0 {
%cmp = icmp eq i32 %in, 0
%fp = uitofp i1 %cmp to float
- store float %fp, float addrspace(1)* %out
+ store float %fp, ptr addrspace(1) %out
ret void
}
@@ -93,9 +93,9 @@ define amdgpu_kernel void @s_sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1.0
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
-define amdgpu_kernel void @s_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) #0 {
+define amdgpu_kernel void @s_sint_to_fp_i1_f32_load(ptr addrspace(1) %out, i1 %in) #0 {
%fp = sitofp i1 %in to float
- store float %fp, float addrspace(1)* %out
+ store float %fp, ptr addrspace(1) %out
ret void
}
@@ -106,13 +106,13 @@ define amdgpu_kernel void @s_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1.0
; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]]
; SI: s_endpgm
-define amdgpu_kernel void @v_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_i1_f32_load(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i1, i1 addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %val = load i1, i1 addrspace(1)* %in.gep
+ %in.gep = getelementptr i1, ptr addrspace(1) %in, i32 %tid
+ %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+ %val = load i1, ptr addrspace(1) %in.gep
%fp = sitofp i1 %val to float
- store float %fp, float addrspace(1)* %out.gep
+ store float %fp, ptr addrspace(1) %out.gep
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll b/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll
index 144c6933962e..aeeb2fd3451d 100644
--- a/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll
@@ -12,12 +12,12 @@
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @sitofp_i16_to_f16(
- half addrspace(1)* %r,
- i16 addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load i16, i16 addrspace(1)* %a
+ %a.val = load i16, ptr addrspace(1) %a
%r.val = sitofp i16 %a.val to half
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -28,12 +28,12 @@ entry:
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @sitofp_i32_to_f16(
- half addrspace(1)* %r,
- i32 addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load i32, i32 addrspace(1)* %a
+ %a.val = load i32, ptr addrspace(1) %a
%r.val = sitofp i32 %a.val to half
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -57,12 +57,12 @@ entry:
; GCN: s_endpgm
define amdgpu_kernel void @sitofp_v2i16_to_v2f16(
- <2 x half> addrspace(1)* %r,
- <2 x i16> addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a
+ %a.val = load <2 x i16>, ptr addrspace(1) %a
%r.val = sitofp <2 x i16> %a.val to <2 x half>
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
@@ -85,12 +85,12 @@ entry:
; GCN: buffer_store_dword
; GCN: s_endpgm
define amdgpu_kernel void @sitofp_v2i32_to_v2f16(
- <2 x half> addrspace(1)* %r,
- <2 x i32> addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load <2 x i32>, <2 x i32> addrspace(1)* %a
+ %a.val = load <2 x i32>, ptr addrspace(1) %a
%r.val = sitofp <2 x i32> %a.val to <2 x half>
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
@@ -102,14 +102,14 @@ entry:
; GCN-NEXT: v_cvt_f16_f32_e32 [[R_F16:v[0-9]+]], [[RESULT]]
; GCN: buffer_store_short
; GCN: s_endpgm
-define amdgpu_kernel void @s_sint_to_fp_i1_to_f16(half addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
- %a = load float, float addrspace(1) * %in0
- %b = load float, float addrspace(1) * %in1
+define amdgpu_kernel void @s_sint_to_fp_i1_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
+ %a = load float, ptr addrspace(1) %in0
+ %b = load float, ptr addrspace(1) %in1
%acmp = fcmp oge float %a, 0.000000e+00
%bcmp = fcmp oge float %b, 1.000000e+00
%result = xor i1 %acmp, %bcmp
%fp = sitofp i1 %result to half
- store half %fp, half addrspace(1)* %out
+ store half %fp, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
index ddb9aef95603..ba52d702c7ed 100644
--- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
@@ -4,7 +4,7 @@
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-define amdgpu_kernel void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @v_uint_to_fp_i64_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: v_uint_to_fp_i64_to_f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -43,14 +43,14 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i6
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
- %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %val = load i64, i64 addrspace(1)* %gep, align 8
+ %gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+ %val = load i64, ptr addrspace(1) %gep, align 8
%result = uitofp i64 %val to double
- store double %result, double addrspace(1)* %out
+ store double %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @s_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) {
+define amdgpu_kernel void @s_uint_to_fp_i64_to_f64(ptr addrspace(1) %out, i64 %in) {
; SI-LABEL: s_uint_to_fp_i64_to_f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -77,11 +77,11 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i6
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
%cast = uitofp i64 %in to double
- store double %cast, double addrspace(1)* %out, align 8
+ store double %cast, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f64(<2 x double> addrspace(1)* %out, <2 x i64> %in) {
+define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f64(ptr addrspace(1) %out, <2 x i64> %in) {
; SI-LABEL: s_uint_to_fp_v2i64_to_v2f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x4
@@ -119,11 +119,11 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f64(<2 x double> addrspace(1)
; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; VI-NEXT: s_endpgm
%cast = uitofp <2 x i64> %in to <2 x double>
- store <2 x double> %cast, <2 x double> addrspace(1)* %out, align 16
+ store <2 x double> %cast, ptr addrspace(1) %out, align 16
ret void
}
-define amdgpu_kernel void @s_uint_to_fp_v4i64_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i64> %in) {
+define amdgpu_kernel void @s_uint_to_fp_v4i64_to_v4f64(ptr addrspace(1) %out, <4 x i64> %in) {
; SI-LABEL: s_uint_to_fp_v4i64_to_v4f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x8
@@ -187,11 +187,11 @@ define amdgpu_kernel void @s_uint_to_fp_v4i64_to_v4f64(<4 x double> addrspace(1)
; VI-NEXT: flat_store_dwordx4 v[8:9], v[0:3]
; VI-NEXT: s_endpgm
%cast = uitofp <4 x i64> %in to <4 x double>
- store <4 x double> %cast, <4 x double> addrspace(1)* %out, align 16
+ store <4 x double> %cast, ptr addrspace(1) %out, align 16
ret void
}
-define amdgpu_kernel void @s_uint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_uint_to_fp_i32_to_f64(ptr addrspace(1) %out, i32 %in) {
; SI-LABEL: s_uint_to_fp_i32_to_f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s2, s[4:5], 0x2
@@ -214,11 +214,11 @@ define amdgpu_kernel void @s_uint_to_fp_i32_to_f64(double addrspace(1)* %out, i3
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
%cast = uitofp i32 %in to double
- store double %cast, double addrspace(1)* %out, align 8
+ store double %cast, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f64(<2 x double> addrspace(1)* %out, <2 x i32> %in) {
+define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f64(ptr addrspace(1) %out, <2 x i32> %in) {
; GCN-LABEL: s_uint_to_fp_v2i32_to_v2f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -230,11 +230,11 @@ define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f64(<2 x double> addrspace(1)
; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-NEXT: s_endpgm
%cast = uitofp <2 x i32> %in to <2 x double>
- store <2 x double> %cast, <2 x double> addrspace(1)* %out, align 16
+ store <2 x double> %cast, ptr addrspace(1) %out, align 16
ret void
}
-define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i32> %in) {
+define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f64(ptr addrspace(1) %out, <4 x i32> %in) {
; SI-LABEL: s_uint_to_fp_v4i32_to_v4f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x4
@@ -275,13 +275,13 @@ define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1)
; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; VI-NEXT: s_endpgm
%cast = uitofp <4 x i32> %in to <4 x double>
- store <4 x double> %cast, <4 x double> addrspace(1)* %out, align 16
+ store <4 x double> %cast, ptr addrspace(1) %out, align 16
ret void
}
; We can't fold the SGPRs into v_cndmask_b32_e32, because it already
; uses an SGPR (implicit vcc).
-define amdgpu_kernel void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @uint_to_fp_i1_to_f64(ptr addrspace(1) %out, i32 %in) {
; SI-LABEL: uint_to_fp_i1_to_f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s2, s[4:5], 0x2
@@ -311,11 +311,11 @@ define amdgpu_kernel void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %
; VI-NEXT: s_endpgm
%cmp = icmp eq i32 %in, 0
%fp = uitofp i1 %cmp to double
- store double %fp, double addrspace(1)* %out, align 4
+ store double %fp, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @uint_to_fp_i1_to_f64_load(double addrspace(1)* %out, i1 %in) {
+define amdgpu_kernel void @uint_to_fp_i1_to_f64_load(ptr addrspace(1) %out, i1 %in) {
; SI-LABEL: uint_to_fp_i1_to_f64_load:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s2, s[4:5], 0x2
@@ -344,11 +344,11 @@ define amdgpu_kernel void @uint_to_fp_i1_to_f64_load(double addrspace(1)* %out,
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
%fp = uitofp i1 %in to double
- store double %fp, double addrspace(1)* %out, align 8
+ store double %fp, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @s_uint_to_fp_i8_to_f64(double addrspace(1)* %out, i8 %in) {
+define amdgpu_kernel void @s_uint_to_fp_i8_to_f64(ptr addrspace(1) %out, i8 %in) {
; SI-LABEL: s_uint_to_fp_i8_to_f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s2, s[4:5], 0x2
@@ -373,7 +373,7 @@ define amdgpu_kernel void @s_uint_to_fp_i8_to_f64(double addrspace(1)* %out, i8
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
%fp = uitofp i8 %in to double
- store double %fp, double addrspace(1)* %out
+ store double %fp, ptr addrspace(1) %out
ret void
}
@@ -397,7 +397,7 @@ define double @v_uint_to_fp_i8_to_f64(i8 %in) {
ret double %fp
}
-define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
; SI-LABEL: s_select_uint_to_fp_i1_vals_f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s2, s[4:5], 0x2
@@ -427,11 +427,11 @@ define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_f64(double addrspace(1)*
; VI-NEXT: s_endpgm
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, double 1.0, double 0.0
- store double %select, double addrspace(1)* %out, align 8
+ store double %select, ptr addrspace(1) %out, align 8
ret void
}
-define void @v_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+define void @v_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
; GCN-LABEL: v_select_uint_to_fp_i1_vals_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -444,11 +444,11 @@ define void @v_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in)
; GCN-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, double 1.0, double 0.0
- store double %select, double addrspace(1)* %out, align 8
+ store double %select, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) {
; SI-LABEL: s_select_uint_to_fp_i1_vals_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s2, s[4:5], 0x2
@@ -478,11 +478,11 @@ define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_i64(i64 addrspace(1)* %ou
; VI-NEXT: s_endpgm
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, i64 u0x3ff0000000000000, i64 0
- store i64 %select, i64 addrspace(1)* %out, align 8
+ store i64 %select, ptr addrspace(1) %out, align 8
ret void
}
-define void @v_select_uint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) {
+define void @v_select_uint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) {
; GCN-LABEL: v_select_uint_to_fp_i1_vals_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -495,12 +495,12 @@ define void @v_select_uint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) {
; GCN-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, i64 u0x3ff0000000000000, i64 0
- store i64 %select, i64 addrspace(1)* %out, align 8
+ store i64 %select, ptr addrspace(1) %out, align 8
ret void
}
; TODO: This should swap the selected order / invert the compare and do it.
-define amdgpu_kernel void @s_swap_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_swap_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
; SI-LABEL: s_swap_select_uint_to_fp_i1_vals_f64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s2, s[4:5], 0x2
@@ -530,11 +530,11 @@ define amdgpu_kernel void @s_swap_select_uint_to_fp_i1_vals_f64(double addrspace
; VI-NEXT: s_endpgm
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, double 0.0, double 1.0
- store double %select, double addrspace(1)* %out, align 8
+ store double %select, ptr addrspace(1) %out, align 8
ret void
}
-define void @v_swap_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+define void @v_swap_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
; GCN-LABEL: v_swap_select_uint_to_fp_i1_vals_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -547,6 +547,6 @@ define void @v_swap_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32
; GCN-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, double 0.0, double 1.0
- store double %select, double addrspace(1)* %out, align 8
+ store double %select, ptr addrspace(1) %out, align 8
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
index 8f8acf4e7cac..226facf7ba3a 100644
--- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
@@ -4,7 +4,7 @@
; FIXME: This should be merged with uint_to_fp.ll, but s_uint_to_fp_v2i64 crashes on r600
-define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 %in) #0 {
+define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(ptr addrspace(1) %out, i64 %in) #0 {
; GFX6-LABEL: s_uint_to_fp_i64_to_f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -43,11 +43,11 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
; GFX8-NEXT: flat_store_short v[0:1], v2
; GFX8-NEXT: s_endpgm
%result = uitofp i64 %in to half
- store half %result, half addrspace(1)* %out
+ store half %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX6-LABEL: v_uint_to_fp_i64_to_f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -99,15 +99,15 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
; GFX8-NEXT: flat_store_short v[0:1], v3
; GFX8-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
- %val = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+ %out.gep = getelementptr half, ptr addrspace(1) %out, i32 %tid
+ %val = load i64, ptr addrspace(1) %in.gep
%result = uitofp i64 %val to half
- store half %result, half addrspace(1)* %out.gep
+ store half %result, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 {
+define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(ptr addrspace(1) %out, i64 %in) #0 {
; GFX6-LABEL: s_uint_to_fp_i64_to_f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -144,11 +144,11 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
%result = uitofp i64 %in to float
- store float %result, float addrspace(1)* %out
+ store float %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX6-LABEL: v_uint_to_fp_i64_to_f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -198,15 +198,15 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %val = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+ %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+ %val = load i64, ptr addrspace(1) %in.gep
%result = uitofp i64 %val to float
- store float %result, float addrspace(1)* %out.gep
+ store float %result, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i64> %in) #0{
+define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32(ptr addrspace(1) %out, <2 x i64> %in) #0{
; GFX6-LABEL: s_uint_to_fp_v2i64_to_v2f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
@@ -259,11 +259,11 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)*
; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX8-NEXT: s_endpgm
%result = uitofp <2 x i64> %in to <2 x float>
- store <2 x float> %result, <2 x float> addrspace(1)* %out
+ store <2 x float> %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX6-LABEL: v_uint_to_fp_v4i64_to_v4f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -368,15 +368,15 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)*
; GFX8-NEXT: flat_store_dwordx4 v[9:10], v[0:3]
; GFX8-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
- %value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep
+ %in.gep = getelementptr <4 x i64>, ptr addrspace(1) %in, i32 %tid
+ %out.gep = getelementptr <4 x float>, ptr addrspace(1) %out, i32 %tid
+ %value = load <4 x i64>, ptr addrspace(1) %in.gep
%result = uitofp <4 x i64> %value to <4 x float>
- store <4 x float> %result, <4 x float> addrspace(1)* %out.gep
+ store <4 x float> %result, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)* %out, <2 x i64> %in) #0{
+define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16(ptr addrspace(1) %out, <2 x i64> %in) #0{
; GFX6-LABEL: s_uint_to_fp_v2i64_to_v2f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
@@ -436,11 +436,11 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)*
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
%result = uitofp <2 x i64> %in to <2 x half>
- store <2 x half> %result, <2 x half> addrspace(1)* %out
+ store <2 x half> %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX6-LABEL: v_uint_to_fp_v4i64_to_v4f16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -559,11 +559,11 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)*
; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GFX8-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr <4 x half>, <4 x half> addrspace(1)* %out, i32 %tid
- %value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep
+ %in.gep = getelementptr <4 x i64>, ptr addrspace(1) %in, i32 %tid
+ %out.gep = getelementptr <4 x half>, ptr addrspace(1) %out, i32 %tid
+ %value = load <4 x i64>, ptr addrspace(1) %in.gep
%result = uitofp <4 x i64> %value to <4 x half>
- store <4 x half> %result, <4 x half> addrspace(1)* %out.gep
+ store <4 x half> %result, ptr addrspace(1) %out.gep
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.ll
index e0a1a7597839..34cf1b67621e 100644
--- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.ll
+++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.ll
@@ -6,9 +6,9 @@
; SI: v_cvt_f32_u32_e32
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-define amdgpu_kernel void @s_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) #0 {
+define amdgpu_kernel void @s_uint_to_fp_i32_to_f32(ptr addrspace(1) %out, i32 %in) #0 {
%result = uitofp i32 %in to float
- store float %result, float addrspace(1)* %out
+ store float %result, ptr addrspace(1) %out
ret void
}
@@ -16,13 +16,13 @@ define amdgpu_kernel void @s_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32
; SI: v_cvt_f32_u32_e32 {{v[0-9]+}}, {{v[0-9]+$}}
; R600: INT_TO_FLT
-define amdgpu_kernel void @v_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_uint_to_fp_i32_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %val = load i32, i32 addrspace(1)* %in.gep
+ %in.gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid
+ %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep
%result = uitofp i32 %val to float
- store float %result, float addrspace(1)* %out.gep
+ store float %result, ptr addrspace(1) %out.gep
ret void
}
@@ -32,9 +32,9 @@ define amdgpu_kernel void @v_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32
; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
-define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i32> %in) #0 {
+define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f32(ptr addrspace(1) %out, <2 x i32> %in) #0 {
%result = uitofp <2 x i32> %in to <2 x float>
- store <2 x float> %result, <2 x float> addrspace(1)* %out
+ store <2 x float> %result, ptr addrspace(1) %out
ret void
}
@@ -49,10 +49,10 @@ define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f32(<2 x float> addrspace(1)*
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
- %value = load <4 x i32>, <4 x i32> addrspace(1) * %in
+define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+ %value = load <4 x i32>, ptr addrspace(1) %in
%result = uitofp <4 x i32> %value to <4 x float>
- store <4 x float> %result, <4 x float> addrspace(1)* %out
+ store <4 x float> %result, ptr addrspace(1) %out
ret void
}
@@ -66,13 +66,13 @@ define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)*
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @v_uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_uint_to_fp_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
- %value = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep
+ %in.gep = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 %tid
+ %out.gep = getelementptr <4 x float>, ptr addrspace(1) %out, i32 %tid
+ %value = load <4 x i32>, ptr addrspace(1) %in.gep
%result = uitofp <4 x i32> %value to <4 x float>
- store <4 x float> %result, <4 x float> addrspace(1)* %out.gep
+ store <4 x float> %result, ptr addrspace(1) %out.gep
ret void
}
@@ -82,10 +82,10 @@ define amdgpu_kernel void @v_uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
-define amdgpu_kernel void @s_uint_to_fp_i1_to_f32(float addrspace(1)* %out, i32 %in) #0 {
+define amdgpu_kernel void @s_uint_to_fp_i1_to_f32(ptr addrspace(1) %out, i32 %in) #0 {
%cmp = icmp eq i32 %in, 0
%fp = uitofp i1 %cmp to float
- store float %fp, float addrspace(1)* %out
+ store float %fp, ptr addrspace(1) %out
ret void
}
@@ -93,9 +93,9 @@ define amdgpu_kernel void @s_uint_to_fp_i1_to_f32(float addrspace(1)* %out, i32
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
-define amdgpu_kernel void @s_uint_to_fp_i1_to_f32_load(float addrspace(1)* %out, i1 %in) #0 {
+define amdgpu_kernel void @s_uint_to_fp_i1_to_f32_load(ptr addrspace(1) %out, i1 %in) #0 {
%fp = uitofp i1 %in to float
- store float %fp, float addrspace(1)* %out
+ store float %fp, ptr addrspace(1) %out
ret void
}
@@ -106,13 +106,13 @@ define amdgpu_kernel void @s_uint_to_fp_i1_to_f32_load(float addrspace(1)* %out,
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0
; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]]
; SI: s_endpgm
-define amdgpu_kernel void @v_uint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_uint_to_fp_i1_f32_load(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i1, i1 addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %val = load i1, i1 addrspace(1)* %in.gep
+ %in.gep = getelementptr i1, ptr addrspace(1) %in, i32 %tid
+ %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+ %val = load i1, ptr addrspace(1) %in.gep
%fp = uitofp i1 %val to float
- store float %fp, float addrspace(1)* %out.gep
+ store float %fp, ptr addrspace(1) %out.gep
ret void
}
@@ -122,10 +122,10 @@ define amdgpu_kernel void @v_uint_to_fp_i1_f32_load(float addrspace(1)* %out, i1
; R600: CNDE_INT
; R600: UINT_TO_FLT
-define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 {
+define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(ptr addrspace(1) %out, i64 %in) #0 {
entry:
%cvt = uitofp i64 %in to float
- store float %cvt, float addrspace(1)* %out
+ store float %cvt, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll b/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll
index 99a3141ab548..fd99d80f22c7 100644
--- a/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll
@@ -11,12 +11,12 @@
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @uitofp_i16_to_f16(
- half addrspace(1)* %r,
- i16 addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load i16, i16 addrspace(1)* %a
+ %a.val = load i16, ptr addrspace(1) %a
%r.val = uitofp i16 %a.val to half
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -27,12 +27,12 @@ entry:
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @uitofp_i32_to_f16(
- half addrspace(1)* %r,
- i32 addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load i32, i32 addrspace(1)* %a
+ %a.val = load i32, ptr addrspace(1) %a
%r.val = uitofp i32 %a.val to half
- store half %r.val, half addrspace(1)* %r
+ store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -56,12 +56,12 @@ entry:
; GCN: buffer_store_dword
; GCN: s_endpgm
define amdgpu_kernel void @uitofp_v2i16_to_v2f16(
- <2 x half> addrspace(1)* %r,
- <2 x i16> addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a
+ %a.val = load <2 x i16>, ptr addrspace(1) %a
%r.val = uitofp <2 x i16> %a.val to <2 x half>
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
@@ -84,12 +84,12 @@ entry:
; GCN: buffer_store_dword
; GCN: s_endpgm
define amdgpu_kernel void @uitofp_v2i32_to_v2f16(
- <2 x half> addrspace(1)* %r,
- <2 x i32> addrspace(1)* %a) {
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
entry:
- %a.val = load <2 x i32>, <2 x i32> addrspace(1)* %a
+ %a.val = load <2 x i32>, ptr addrspace(1) %a
%r.val = uitofp <2 x i32> %a.val to <2 x half>
- store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+ store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
@@ -101,14 +101,14 @@ entry:
; GCN-NEXT: v_cvt_f16_f32_e32 [[R_F16:v[0-9]+]], [[RESULT]]
; GCN: buffer_store_short
; GCN: s_endpgm
-define amdgpu_kernel void @s_uint_to_fp_i1_to_f16(half addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
- %a = load float, float addrspace(1) * %in0
- %b = load float, float addrspace(1) * %in1
+define amdgpu_kernel void @s_uint_to_fp_i1_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
+ %a = load float, ptr addrspace(1) %in0
+ %b = load float, ptr addrspace(1) %in1
%acmp = fcmp oge float %a, 0.000000e+00
%bcmp = fcmp oge float %b, 1.000000e+00
%result = xor i1 %acmp, %bcmp
%fp = uitofp i1 %result to half
- store half %fp, half addrspace(1)* %out
+ store half %fp, ptr addrspace(1) %out
ret void
}
More information about the llvm-commits
mailing list