[clang] 0419465 - [Clang] Update some CUDA tests to opaque pointers (NFC)
Nikita Popov via cfe-commits
cfe-commits at lists.llvm.org
Tue Dec 13 02:50:17 PST 2022
Author: Nikita Popov
Date: 2022-12-13T11:50:08+01:00
New Revision: 0419465fa4358af1ec808e376e3881377bfac76b
URL: https://github.com/llvm/llvm-project/commit/0419465fa4358af1ec808e376e3881377bfac76b
DIFF: https://github.com/llvm/llvm-project/commit/0419465fa4358af1ec808e376e3881377bfac76b.diff
LOG: [Clang] Update some CUDA tests to opaque pointers (NFC)
Added:
Modified:
clang/test/CodeGenCUDA/address-spaces.cu
clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
clang/test/CodeGenCUDA/atomic-ops.cu
clang/test/CodeGenCUDA/builtin-mangled-name.cu
clang/test/CodeGenCUDA/builtins-amdgcn.cu
clang/test/CodeGenCUDA/builtins-unsafe-atomics-gfx90a.cu
clang/test/CodeGenCUDA/const-var.cu
clang/test/CodeGenCUDA/cuda-builtin-vars.cu
clang/test/CodeGenCUDA/debug-info-address-class.cu
clang/test/CodeGenCUDA/device-var-init.cu
clang/test/CodeGenCUDA/device-var-linkage.cu
clang/test/CodeGenCUDA/host-used-device-var.cu
clang/test/CodeGenCUDA/kernel-args-alignment.cu
clang/test/CodeGenCUDA/kernel-args.cu
clang/test/CodeGenCUDA/kernel-dbg-info.cu
clang/test/CodeGenCUDA/kernel-stub-name.cu
clang/test/CodeGenCUDA/lambda-noinline.cu
clang/test/CodeGenCUDA/lambda-reference-var.cu
clang/test/CodeGenCUDA/lambda.cu
clang/test/CodeGenCUDA/launch-bounds.cu
clang/test/CodeGenCUDA/llvm-used.cu
clang/test/CodeGenCUDA/printf.cu
clang/test/CodeGenCUDA/ptx-kernels.cu
clang/test/CodeGenCUDA/redux-builtins.cu
Removed:
################################################################################
diff --git a/clang/test/CodeGenCUDA/address-spaces.cu b/clang/test/CodeGenCUDA/address-spaces.cu
index dde36a618b349..0608c9cabd048 100644
--- a/clang/test/CodeGenCUDA/address-spaces.cu
+++ b/clang/test/CodeGenCUDA/address-spaces.cu
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -no-opaque-pointers -emit-llvm %s -o - -fcuda-is-device -triple nvptx-unknown-unknown | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -emit-llvm %s -o - -fcuda-is-device -triple amdgcn | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple nvptx-unknown-unknown | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple amdgcn | FileCheck %s
// Verifies Clang emits correct address spaces and addrspacecast instructions
// for CUDA code.
@@ -28,17 +28,17 @@ struct MyStruct {
// CHECK: @b ={{.*}} addrspace(3) global float undef
__device__ void foo() {
- // CHECK: load i32, i32* addrspacecast (i32 addrspace(1)* @i to i32*)
+ // CHECK: load i32, ptr addrspacecast (ptr addrspace(1) @i to ptr)
i++;
- // CHECK: load i32, i32* addrspacecast (i32 addrspace(4)* @j to i32*)
+ // CHECK: load i32, ptr addrspacecast (ptr addrspace(4) @j to ptr)
j++;
- // CHECK: load i32, i32* addrspacecast (i32 addrspace(3)* @k to i32*)
+ // CHECK: load i32, ptr addrspacecast (ptr addrspace(3) @k to ptr)
k++;
__shared__ int lk;
- // CHECK: load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ3foovE2lk to i32*)
+ // CHECK: load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ3foovE2lk to ptr)
lk++;
}
@@ -49,7 +49,7 @@ __device__ void func0() {
ap->data2 = 2;
}
// CHECK: define{{.*}} void @_Z5func0v()
-// CHECK: store %struct.MyStruct* addrspacecast (%struct.MyStruct addrspace(3)* @_ZZ5func0vE1a to %struct.MyStruct*), %struct.MyStruct** %{{.*}}
+// CHECK: store ptr addrspacecast (ptr addrspace(3) @_ZZ5func0vE1a to ptr), ptr %{{.*}}
__device__ void callee(float *ap) {
*ap = 1.0f;
@@ -60,7 +60,7 @@ __device__ void func1() {
callee(&a); // implicit cast from parameters
}
// CHECK: define{{.*}} void @_Z5func1v()
-// CHECK: call void @_Z6calleePf(float* noundef addrspacecast (float addrspace(3)* @_ZZ5func1vE1a to float*))
+// CHECK: call void @_Z6calleePf(ptr noundef addrspacecast (ptr addrspace(3) @_ZZ5func1vE1a to ptr))
__device__ void func2() {
__shared__ float a[256];
@@ -68,7 +68,7 @@ __device__ void func2() {
*ap = 1.0f;
}
// CHECK: define{{.*}} void @_Z5func2v()
-// CHECK: store float* getelementptr inbounds ([256 x float], [256 x float]* addrspacecast ([256 x float] addrspace(3)* @_ZZ5func2vE1a to [256 x float]*), i{{32|64}} 0, i{{32|64}} 128), float** %{{.*}}
+// CHECK: store ptr getelementptr inbounds ([256 x float], ptr addrspacecast (ptr addrspace(3) @_ZZ5func2vE1a to ptr), i{{32|64}} 0, i{{32|64}} 128), ptr %{{.*}}
__device__ void func3() {
__shared__ float a;
@@ -76,7 +76,7 @@ __device__ void func3() {
*ap = 1.0f;
}
// CHECK: define{{.*}} void @_Z5func3v()
-// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func3vE1a to float*), float** %{{.*}}
+// CHECK: store ptr addrspacecast (ptr addrspace(3) @_ZZ5func3vE1a to ptr), ptr %{{.*}}
__device__ void func4() {
__shared__ float a;
@@ -84,12 +84,12 @@ __device__ void func4() {
*ap = 1.0f;
}
// CHECK: define{{.*}} void @_Z5func4v()
-// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func4vE1a to float*), float** %{{.*}}
+// CHECK: store ptr addrspacecast (ptr addrspace(3) @_ZZ5func4vE1a to ptr), ptr %{{.*}}
__shared__ float b;
__device__ float *func5() {
return &b; // implicit cast from a return value
}
-// CHECK: define{{.*}} float* @_Z5func5v()
-// CHECK: ret float* addrspacecast (float addrspace(3)* @b to float*)
+// CHECK: define{{.*}} ptr @_Z5func5v()
+// CHECK: ret ptr addrspacecast (ptr addrspace(3) @b to ptr)
diff --git a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
index 5d64be1f52619..2e303dcebacc6 100644
--- a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -no-opaque-pointers %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -target-cpu gfx906 -fnative-half-type \
// RUN: -fnative-half-arguments-and-returns | FileCheck %s
@@ -9,33 +9,33 @@
__device__ float ffp1(float *p) {
// CHECK-LABEL: @_Z4ffp1Pf
- // CHECK: atomicrmw fadd float* {{.*}} monotonic
+ // CHECK: atomicrmw fadd ptr {{.*}} monotonic
return __atomic_fetch_add(p, 1.0f, memory_order_relaxed);
}
__device__ double ffp2(double *p) {
// CHECK-LABEL: @_Z4ffp2Pd
- // CHECK: atomicrmw fsub double* {{.*}} monotonic
+ // CHECK: atomicrmw fsub ptr {{.*}} monotonic
return __atomic_fetch_sub(p, 1.0, memory_order_relaxed);
}
// long double is the same as double for amdgcn.
__device__ long double ffp3(long double *p) {
// CHECK-LABEL: @_Z4ffp3Pe
- // CHECK: atomicrmw fsub double* {{.*}} monotonic
+ // CHECK: atomicrmw fsub ptr {{.*}} monotonic
return __atomic_fetch_sub(p, 1.0L, memory_order_relaxed);
}
__device__ double ffp4(double *p, float f) {
// CHECK-LABEL: @_Z4ffp4Pdf
// CHECK: fpext float {{.*}} to double
- // CHECK: atomicrmw fsub double* {{.*}} monotonic
+ // CHECK: atomicrmw fsub ptr {{.*}} monotonic
return __atomic_fetch_sub(p, f, memory_order_relaxed);
}
__device__ double ffp5(double *p, int i) {
// CHECK-LABEL: @_Z4ffp5Pdi
// CHECK: sitofp i32 {{.*}} to double
- // CHECK: atomicrmw fsub double* {{.*}} monotonic
+ // CHECK: atomicrmw fsub ptr {{.*}} monotonic
return __atomic_fetch_sub(p, i, memory_order_relaxed);
}
diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
index 4d788e6807ab2..e506b875b6748 100644
--- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
@@ -1,31 +1,31 @@
-// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa \
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefix=PRECOV5 %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa \
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefix=COV5 %s
#include "Inputs/cuda.h"
// PRECOV5-LABEL: test_get_workgroup_size
-// PRECOV5: call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
-// PRECOV5: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 4
-// PRECOV5: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
-// PRECOV5: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 6
-// PRECOV5: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
-// PRECOV5: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 8
-// PRECOV5: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
+// PRECOV5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// PRECOV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
+// PRECOV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
+// PRECOV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
+// PRECOV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
+// PRECOV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
+// PRECOV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
// COV5-LABEL: test_get_workgroup_size
-// COV5: call align 8 dereferenceable(256) i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
-// COV5: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 12
-// COV5: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
-// COV5: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 14
-// COV5: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
-// COV5: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 16
-// COV5: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
+// COV5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+// COV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
+// COV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
+// COV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
+// COV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
+// COV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
+// COV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
__device__ void test_get_workgroup_size(int d, int *out)
{
switch (d) {
diff --git a/clang/test/CodeGenCUDA/atomic-ops.cu b/clang/test/CodeGenCUDA/atomic-ops.cu
index ef0128f460956..13f4a015386cb 100644
--- a/clang/test/CodeGenCUDA/atomic-ops.cu
+++ b/clang/test/CodeGenCUDA/atomic-ops.cu
@@ -1,18 +1,18 @@
-// RUN: %clang_cc1 -no-opaque-pointers -x hip -std=c++11 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -x hip -std=c++11 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s
#include "Inputs/cuda.h"
// CHECK-LABEL: @_Z24atomic32_op_singlethreadPiii
-// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") monotonic monotonic, align 4
-// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: load atomic i32, i32* {{%[0-9]+}} syncscope("singlethread-one-as") monotonic, align 4
-// CHECK: store atomic i32 %{{.*}}, i32* %{{.*}} syncscope("singlethread-one-as") monotonic, align 4
+// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") monotonic monotonic, align 4
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: load atomic i32, ptr {{%[0-9]+}} syncscope("singlethread-one-as") monotonic, align 4
+// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("singlethread-one-as") monotonic, align 4
__device__ int atomic32_op_singlethread(int *ptr, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
@@ -29,8 +29,8 @@ __device__ int atomic32_op_singlethread(int *ptr, int val, int desired) {
}
// CHECK-LABEL: @_Z25atomicu32_op_singlethreadPjjj
-// CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
__device__ unsigned int atomicu32_op_singlethread(unsigned int *ptr, unsigned int val, unsigned int desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
@@ -38,17 +38,17 @@ __device__ unsigned int atomicu32_op_singlethread(unsigned int *ptr, unsigned in
}
// CHECK-LABEL: @_Z21atomic32_op_wavefrontPiii
-// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") monotonic monotonic, align 4
-// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: load atomic i32, i32* {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 4
-// CHECK: store atomic i32 %{{.*}}, i32* %{{.*}} syncscope("wavefront-one-as") monotonic, align 4
+// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") monotonic monotonic, align 4
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: load atomic i32, ptr {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 4
+// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("wavefront-one-as") monotonic, align 4
__device__ int atomic32_op_wavefront(int *ptr, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
@@ -65,8 +65,8 @@ __device__ int atomic32_op_wavefront(int *ptr, int val, int desired) {
}
// CHECK-LABEL: @_Z22atomicu32_op_wavefrontPjjj
-// CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
__device__ unsigned int atomicu32_op_wavefront(unsigned int *ptr, unsigned int val, unsigned int desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
@@ -74,16 +74,16 @@ __device__ unsigned int atomicu32_op_wavefront(unsigned int *ptr, unsigned int v
}
// CHECK-LABEL: @_Z21atomic32_op_workgroupPiii
-// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") monotonic monotonic, align 4
-// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: store atomic i32 %{{.*}}, i32* %{{.*}} syncscope("workgroup-one-as") monotonic, align 4
+// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") monotonic monotonic, align 4
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("workgroup-one-as") monotonic, align 4
__device__ int atomic32_op_workgroup(int *ptr, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
@@ -99,8 +99,8 @@ __device__ int atomic32_op_workgroup(int *ptr, int val, int desired) {
}
// CHECK-LABEL: @_Z22atomicu32_op_workgroupPjjj
-// CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
__device__ unsigned int atomicu32_op_workgroup(unsigned int *ptr, unsigned int val, unsigned int desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
@@ -108,16 +108,16 @@ __device__ unsigned int atomicu32_op_workgroup(unsigned int *ptr, unsigned int v
}
// CHECK-LABEL: @_Z17atomic32_op_agentPiii
-// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") monotonic monotonic, align 4
-// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: store atomic i32 %{{.*}}, i32* %{{.*}} syncscope("agent-one-as") monotonic, align 4
+// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") monotonic monotonic, align 4
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("agent-one-as") monotonic, align 4
__device__ int atomic32_op_agent(int *ptr, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
@@ -133,8 +133,8 @@ __device__ int atomic32_op_agent(int *ptr, int val, int desired) {
}
// CHECK-LABEL: @_Z18atomicu32_op_agentPjjj
-// CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
__device__ unsigned int atomicu32_op_agent(unsigned int *ptr, unsigned int val, unsigned int desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
@@ -142,17 +142,17 @@ __device__ unsigned int atomicu32_op_agent(unsigned int *ptr, unsigned int val,
}
// CHECK-LABEL: @_Z18atomic32_op_systemPiii
-// CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
-// CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") monotonic monotonic, align 4
-// CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
-// CHECK: load i32, i32* %{{.*}}, align 4
-// CHECK: store atomic i32 %{{.*}}, i32* %{{.*}} syncscope("one-as") monotonic, align 4
+// CHECK: cmpxchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
+// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") monotonic monotonic, align 4
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
+// CHECK: load i32, ptr %{{.*}}, align 4
+// CHECK: store atomic i32 %{{.*}}, ptr %{{.*}} syncscope("one-as") monotonic, align 4
__device__ int atomic32_op_system(int *ptr, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
@@ -169,8 +169,8 @@ __device__ int atomic32_op_system(int *ptr, int val, int desired) {
}
// CHECK-LABEL: @_Z19atomicu32_op_systemPjjj
-// CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
__device__ unsigned int atomicu32_op_system(unsigned int *ptr, unsigned int val, unsigned int desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
@@ -178,16 +178,16 @@ __device__ unsigned int atomicu32_op_system(unsigned int *ptr, unsigned int val,
}
// CHECK-LABEL: @_Z24atomic64_op_singlethreadPxS_xx
-// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") monotonic monotonic, align 8
-// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("singlethread-one-as") monotonic, align 8
+// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") monotonic monotonic, align 8
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("singlethread-one-as") monotonic, align 8
__device__ long long atomic64_op_singlethread(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
@@ -203,10 +203,10 @@ __device__ long long atomic64_op_singlethread(long long *ptr, long long *ptr2, l
}
// CHECK-LABEL: @_Z25atomicu64_op_singlethreadPyS_yy
-// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
-// CHECK: load atomic i64, i64* %{{.*}} syncscope("singlethread-one-as") monotonic, align 8
-// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("singlethread-one-as") monotonic, align 8
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
+// CHECK: load atomic i64, ptr %{{.*}} syncscope("singlethread-one-as") monotonic, align 8
+// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("singlethread-one-as") monotonic, align 8
__device__ unsigned long long atomicu64_op_singlethread(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
@@ -216,17 +216,17 @@ __device__ unsigned long long atomicu64_op_singlethread(unsigned long long *ptr,
}
// CHECK-LABEL: @_Z21atomic64_op_wavefrontPxS_xx
-// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") monotonic monotonic, align 8
-// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: load atomic i64, i64* {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 8
-// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("wavefront-one-as") monotonic, align 8
+// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") monotonic monotonic, align 8
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: load atomic i64, ptr {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 8
+// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("wavefront-one-as") monotonic, align 8
__device__ long long atomic64_op_wavefront(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
@@ -243,10 +243,10 @@ __device__ long long atomic64_op_wavefront(long long *ptr, long long *ptr2, long
}
// CHECK-LABEL: @_Z22atomicu64_op_wavefrontPyS_yy
-// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
-// CHECK: load atomic i64, i64* {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 8
-// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("wavefront-one-as") monotonic, align 8
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
+// CHECK: load atomic i64, ptr {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 8
+// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("wavefront-one-as") monotonic, align 8
__device__ unsigned long long atomicu64_op_wavefront(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
@@ -256,16 +256,16 @@ __device__ unsigned long long atomicu64_op_wavefront(unsigned long long *ptr, un
}
// CHECK-LABEL: @_Z21atomic64_op_workgroupPxS_xx
-// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") monotonic monotonic, align 8
-// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("workgroup-one-as") monotonic, align 8
+// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") monotonic monotonic, align 8
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("workgroup-one-as") monotonic, align 8
__device__ long long atomic64_op_workgroup(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
@@ -281,9 +281,9 @@ __device__ long long atomic64_op_workgroup(long long *ptr, long long *ptr2, long
}
// CHECK-LABEL: @_Z22atomicu64_op_workgroupPyS_yy
-// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
-// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("workgroup-one-as") monotonic, align 8
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
+// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("workgroup-one-as") monotonic, align 8
__device__ unsigned long long atomicu64_op_workgroup(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
@@ -292,16 +292,16 @@ __device__ unsigned long long atomicu64_op_workgroup(unsigned long long *ptr, un
}
// CHECK-LABEL: @_Z17atomic64_op_agentPxS_xx
-// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") monotonic monotonic, align 8
-// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("agent-one-as") monotonic, align 8
+// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") monotonic monotonic, align 8
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("agent-one-as") monotonic, align 8
__device__ long long atomic64_op_agent(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
@@ -317,9 +317,9 @@ __device__ long long atomic64_op_agent(long long *ptr, long long *ptr2, long lon
}
// CHECK-LABEL: @_Z18atomicu64_op_agentPyS_yy
-// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
-// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("agent-one-as") monotonic, align 8
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
+// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("agent-one-as") monotonic, align 8
__device__ unsigned long long atomicu64_op_agent(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
@@ -328,17 +328,17 @@ __device__ unsigned long long atomicu64_op_agent(unsigned long long *ptr, unsign
}
// CHECK-LABEL: @_Z18atomic64_op_systemPxS_xx
-// CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-// CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") monotonic monotonic, align 8
-// CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-// CHECK: load i64, i64* %{{.*}}, align 8
-// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("one-as") monotonic, align 8
+// CHECK: cmpxchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
+// CHECK: cmpxchg weak ptr {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") monotonic monotonic, align 8
+// CHECK: atomicrmw xchg ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw add ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw and ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw or ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw xor ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw min ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw max ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
+// CHECK: load i64, ptr %{{.*}}, align 8
+// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("one-as") monotonic, align 8
__device__ long long atomic64_op_system(long long *ptr, long long *ptr2, long long val, long long desired) {
bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
@@ -355,10 +355,10 @@ __device__ long long atomic64_op_system(long long *ptr, long long *ptr2, long lo
}
// CHECK-LABEL: @_Z19atomicu64_op_systemPyS_yy
-// CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-// CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
-// CHECK: load i64, i64* %{{.*}}, align 8
-// CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("one-as") monotonic, align 8
+// CHECK: atomicrmw umin ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
+// CHECK: atomicrmw umax ptr {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
+// CHECK: load i64, ptr %{{.*}}, align 8
+// CHECK: store atomic i64 %{{.*}}, ptr %{{.*}} syncscope("one-as") monotonic, align 8
__device__ unsigned long long atomicu64_op_system(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
diff --git a/clang/test/CodeGenCUDA/builtin-mangled-name.cu b/clang/test/CodeGenCUDA/builtin-mangled-name.cu
index 4cd176baf7399..6a6a2e7f5d65a 100644
--- a/clang/test/CodeGenCUDA/builtin-mangled-name.cu
+++ b/clang/test/CodeGenCUDA/builtin-mangled-name.cu
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-gnu-linux -aux-triple amdgcn-amd-amdhsa \
+// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux -aux-triple amdgcn-amd-amdhsa \
// RUN: -emit-llvm -o - -x hip %s | FileCheck -check-prefixes=CHECK,LNX %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-windows-msvc -aux-triple amdgcn-amd-amdhsa \
+// RUN: %clang_cc1 -triple x86_64-unknown-windows-msvc -aux-triple amdgcn-amd-amdhsa \
// RUN: -emit-llvm -o - -x hip %s | FileCheck -check-prefixes=CHECK,MSVC %s
#include "Inputs/cuda.h"
@@ -15,14 +15,14 @@ namespace X {
// LNX-LABEL: define {{.*}}@_Z4fun1v()
// MSVC-LABEL: define {{.*}} @"?fun1@@YAPEBDXZ"()
-// CHECK: ret i8* getelementptr inbounds ({{.*}} @[[STR1]], i64 0, i64 0)
+// CHECK: ret ptr @[[STR1]]
const char *fun1() {
return __builtin_get_device_side_mangled_name(X::kern1);
}
// LNX-LABEL: define {{.*}}@_Z4fun2v()
// MSVC-LABEL: define {{.*}}@"?fun2@@YAPEBDXZ"()
-// CHECK: ret i8* getelementptr inbounds ({{.*}} @[[STR2]], i64 0, i64 0)
+// CHECK: ret ptr @[[STR2]]
__host__ __device__ const char *fun2() {
return __builtin_get_device_side_mangled_name(X::var1);
}
diff --git a/clang/test/CodeGenCUDA/builtins-amdgcn.cu b/clang/test/CodeGenCUDA/builtins-amdgcn.cu
index 2278c26f0bcfd..c44d198882196 100644
--- a/clang/test/CodeGenCUDA/builtins-amdgcn.cu
+++ b/clang/test/CodeGenCUDA/builtins-amdgcn.cu
@@ -1,9 +1,9 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -target-cpu gfx906 -x hip \
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 -x hip \
// RUN: -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -emit-llvm %s \
// RUN: -o - | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -target-cpu gfx906 -x hip \
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 -x hip \
// RUN: -aux-triple x86_64-pc-windows-msvc -fcuda-is-device -emit-llvm %s \
// RUN: -o - | FileCheck %s
@@ -11,23 +11,23 @@
// CHECK-LABEL: @_Z16use_dispatch_ptrPi(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[OUT:%.*]] = alloca i32*, align 8, addrspace(5)
-// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca i32*, align 8, addrspace(5)
-// CHECK-NEXT: [[DISPATCH_PTR:%.*]] = alloca i32*, align 8, addrspace(5)
-// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[OUT]] to i32**
-// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[OUT_ADDR]] to i32**
-// CHECK-NEXT: [[DISPATCH_PTR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[DISPATCH_PTR]] to i32**
-// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i32 addrspace(1)* [[OUT_COERCE:%.*]] to i32*
-// CHECK-NEXT: store i32* [[TMP0]], i32** [[OUT_ASCAST]], align 8
-// CHECK-NEXT: [[OUT1:%.*]] = load i32*, i32** [[OUT_ASCAST]], align 8
-// CHECK-NEXT: store i32* [[OUT1]], i32** [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
-// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast i8 addrspace(4)* [[TMP1]] to i32*
-// CHECK-NEXT: store i32* [[TMP2]], i32** [[DISPATCH_PTR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DISPATCH_PTR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
-// CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP5]], align 4
+// CHECK-NEXT: [[OUT:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DISPATCH_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT]] to ptr
+// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT: [[DISPATCH_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DISPATCH_PTR]] to ptr
+// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[OUT_COERCE:%.*]] to ptr
+// CHECK-NEXT: store ptr [[TMP0]], ptr [[OUT_ASCAST]], align 8
+// CHECK-NEXT: [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(4) [[TMP1]] to ptr
+// CHECK-NEXT: store ptr [[TMP2]], ptr [[DISPATCH_PTR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DISPATCH_PTR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4
// CHECK-NEXT: ret void
//
__global__ void use_dispatch_ptr(int* out) {
@@ -40,12 +40,12 @@ __global__
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5)
- // CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float*
- // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float*
- // CHECK-NEXT: store float [[SRC:%.*]], float* [[SRC_ADDR_ASCAST]], align 4
- // CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[SRC_ADDR_ASCAST]], align 4
- // CHECK-NEXT: [[TMP1:%.*]] = call contract float @llvm.amdgcn.ds.fmax.f32(float addrspace(3)* @_ZZ12test_ds_fmaxfE6shared, float [[TMP0]], i32 0, i32 0, i1 false)
- // CHECK-NEXT: store volatile float [[TMP1]], float* [[X_ASCAST]], align 4
+ // CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr
+ // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
+ // CHECK-NEXT: store float [[SRC:%.*]], ptr [[SRC_ADDR_ASCAST]], align 4
+ // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[SRC_ADDR_ASCAST]], align 4
+ // CHECK-NEXT: [[TMP1:%.*]] = call contract float @llvm.amdgcn.ds.fmax.f32(ptr addrspace(3) @_ZZ12test_ds_fmaxfE6shared, float [[TMP0]], i32 0, i32 0, i1 false)
+ // CHECK-NEXT: store volatile float [[TMP1]], ptr [[X_ASCAST]], align 4
// CHECK-NEXT: ret void
//
void
@@ -58,12 +58,12 @@ __global__
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float*
-// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float*
-// CHECK-NEXT: store float [[SRC:%.*]], float* [[SRC_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[SRC_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = call contract float @llvm.amdgcn.ds.fadd.f32(float addrspace(3)* @_ZZ12test_ds_faddfE6shared, float [[TMP0]], i32 0, i32 0, i1 false)
-// CHECK-NEXT: store volatile float [[TMP1]], float* [[X_ASCAST]], align 4
+// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr
+// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
+// CHECK-NEXT: store float [[SRC:%.*]], ptr [[SRC_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[SRC_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = call contract float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) @_ZZ12test_ds_faddfE6shared, float [[TMP0]], i32 0, i32 0, i1 false)
+// CHECK-NEXT: store volatile float [[TMP1]], ptr [[X_ASCAST]], align 4
// CHECK-NEXT: ret void
//
__global__ void test_ds_fadd(float src) {
@@ -73,24 +73,24 @@ __global__ void test_ds_fadd(float src) {
// CHECK-LABEL: @_Z12test_ds_fminfPf(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[SHARED:%.*]] = alloca float*, align 8, addrspace(5)
+// CHECK-NEXT: [[SHARED:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[SHARED_ADDR:%.*]] = alloca float*, align 8, addrspace(5)
+// CHECK-NEXT: [[SHARED_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED]] to float**
-// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float*
-// CHECK-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED_ADDR]] to float**
-// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float*
-// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float addrspace(1)* [[SHARED_COERCE:%.*]] to float*
-// CHECK-NEXT: store float* [[TMP0]], float** [[SHARED_ASCAST]], align 8
-// CHECK-NEXT: [[SHARED1:%.*]] = load float*, float** [[SHARED_ASCAST]], align 8
-// CHECK-NEXT: store float [[SRC:%.*]], float* [[SRC_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store float* [[SHARED1]], float** [[SHARED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load float*, float** [[SHARED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast float* [[TMP1]] to float addrspace(3)*
-// CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[SRC_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.ds.fmin.f32(float addrspace(3)* [[TMP2]], float [[TMP3]], i32 0, i32 0, i1 false)
-// CHECK-NEXT: store volatile float [[TMP4]], float* [[X_ASCAST]], align 4
+// CHECK-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SHARED]] to ptr
+// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr
+// CHECK-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SHARED_ADDR]] to ptr
+// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
+// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[SHARED_COERCE:%.*]] to ptr
+// CHECK-NEXT: store ptr [[TMP0]], ptr [[SHARED_ASCAST]], align 8
+// CHECK-NEXT: [[SHARED1:%.*]] = load ptr, ptr [[SHARED_ASCAST]], align 8
+// CHECK-NEXT: store float [[SRC:%.*]], ptr [[SRC_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[SHARED1]], ptr [[SHARED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SHARED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[TMP1]] to ptr addrspace(3)
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[SRC_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) [[TMP2]], float [[TMP3]], i32 0, i32 0, i1 false)
+// CHECK-NEXT: store volatile float [[TMP4]], ptr [[X_ASCAST]], align 4
// CHECK-NEXT: ret void
//
__global__ void test_ds_fmin(float src, float *shared) {
@@ -99,11 +99,11 @@ __global__ void test_ds_fmin(float src, float *shared) {
// CHECK-LABEL: @_Z33test_ret_builtin_nondef_addrspacev(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[X:%.*]] = alloca i8*, align 8, addrspace(5)
-// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast i8* addrspace(5)* [[X]] to i8**
-// CHECK-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
-// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast i8 addrspace(4)* [[TMP0]] to i8*
-// CHECK-NEXT: store i8* [[TMP1]], i8** [[X_ASCAST]], align 8
+// CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
+// CHECK-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
+// CHECK-NEXT: store ptr [[TMP1]], ptr [[X_ASCAST]], align 8
// CHECK-NEXT: ret void
//
__device__ void test_ret_builtin_nondef_addrspace() {
@@ -123,25 +123,25 @@ __global__ void endpgm() {
// CHECK-LABEL: @_Z14test_uicmp_i64Pyyy(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[OUT:%.*]] = alloca i64*, align 8, addrspace(5)
-// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca i64*, align 8, addrspace(5)
+// CHECK-NEXT: [[OUT:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
-// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT]] to i64**
-// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT_ADDR]] to i64**
-// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[A_ADDR]] to i64*
-// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[B_ADDR]] to i64*
-// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i64 addrspace(1)* [[OUT_COERCE:%.*]] to i64*
-// CHECK-NEXT: store i64* [[TMP0]], i64** [[OUT_ASCAST]], align 8
-// CHECK-NEXT: [[OUT1:%.*]] = load i64*, i64** [[OUT_ASCAST]], align 8
-// CHECK-NEXT: store i64* [[OUT1]], i64** [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[A:%.*]], i64* [[A_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[B:%.*]], i64* [[B_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[B_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT]] to ptr
+// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[OUT_COERCE:%.*]] to ptr
+// CHECK-NEXT: store ptr [[TMP0]], ptr [[OUT_ASCAST]], align 8
+// CHECK-NEXT: [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[TMP1]], i64 [[TMP2]], i32 35)
-// CHECK-NEXT: [[TMP4:%.*]] = load i64*, i64** [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP3]], i64* [[TMP4]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP3]], ptr [[TMP4]], align 8
// CHECK-NEXT: ret void
//
__global__ void test_uicmp_i64(unsigned long long *out, unsigned long long a, unsigned long long b)
@@ -153,17 +153,17 @@ __global__ void test_uicmp_i64(unsigned long long *out, unsigned long long a, un
// CHECK-LABEL: @_Z14test_s_memtimePy(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[OUT:%.*]] = alloca i64*, align 8, addrspace(5)
-// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca i64*, align 8, addrspace(5)
-// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT]] to i64**
-// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT_ADDR]] to i64**
-// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i64 addrspace(1)* [[OUT_COERCE:%.*]] to i64*
-// CHECK-NEXT: store i64* [[TMP0]], i64** [[OUT_ASCAST]], align 8
-// CHECK-NEXT: [[OUT1:%.*]] = load i64*, i64** [[OUT_ASCAST]], align 8
-// CHECK-NEXT: store i64* [[OUT1]], i64** [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[OUT:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT]] to ptr
+// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[OUT_COERCE:%.*]] to ptr
+// CHECK-NEXT: store ptr [[TMP0]], ptr [[OUT_ASCAST]], align 8
+// CHECK-NEXT: [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.s.memtime()
-// CHECK-NEXT: [[TMP2:%.*]] = load i64*, i64** [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i64 [[TMP1]], i64* [[TMP2]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8
// CHECK-NEXT: ret void
//
__global__ void test_s_memtime(unsigned long long* out)
@@ -176,26 +176,26 @@ __device__ void func(float *x);
// CHECK-LABEL: @_Z17test_ds_fmin_funcfPf(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[SHARED:%.*]] = alloca float*, align 8, addrspace(5)
+// CHECK-NEXT: [[SHARED:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[SHARED_ADDR:%.*]] = alloca float*, align 8, addrspace(5)
+// CHECK-NEXT: [[SHARED_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED]] to float**
-// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float*
-// CHECK-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED_ADDR]] to float**
-// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float*
-// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float addrspace(1)* [[SHARED_COERCE:%.*]] to float*
-// CHECK-NEXT: store float* [[TMP0]], float** [[SHARED_ASCAST]], align 8
-// CHECK-NEXT: [[SHARED1:%.*]] = load float*, float** [[SHARED_ASCAST]], align 8
-// CHECK-NEXT: store float [[SRC:%.*]], float* [[SRC_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store float* [[SHARED1]], float** [[SHARED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load float*, float** [[SHARED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast float* [[TMP1]] to float addrspace(3)*
-// CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[SRC_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.ds.fmin.f32(float addrspace(3)* [[TMP2]], float [[TMP3]], i32 0, i32 0, i1 false)
-// CHECK-NEXT: store volatile float [[TMP4]], float* [[X_ASCAST]], align 4
-// CHECK-NEXT: [[TMP5:%.*]] = load float*, float** [[SHARED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: call void @_Z4funcPf(float* noundef [[TMP5]]) #[[ATTR8:[0-9]+]]
+// CHECK-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SHARED]] to ptr
+// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr
+// CHECK-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SHARED_ADDR]] to ptr
+// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
+// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[SHARED_COERCE:%.*]] to ptr
+// CHECK-NEXT: store ptr [[TMP0]], ptr [[SHARED_ASCAST]], align 8
+// CHECK-NEXT: [[SHARED1:%.*]] = load ptr, ptr [[SHARED_ASCAST]], align 8
+// CHECK-NEXT: store float [[SRC:%.*]], ptr [[SRC_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[SHARED1]], ptr [[SHARED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SHARED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[TMP1]] to ptr addrspace(3)
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[SRC_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) [[TMP2]], float [[TMP3]], i32 0, i32 0, i1 false)
+// CHECK-NEXT: store volatile float [[TMP4]], ptr [[X_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[SHARED_ADDR_ASCAST]], align 8
+// CHECK-NEXT: call void @_Z4funcPf(ptr noundef [[TMP5]]) #[[ATTR8:[0-9]+]]
// CHECK-NEXT: ret void
//
__global__ void test_ds_fmin_func(float src, float *__restrict shared) {
@@ -205,21 +205,20 @@ __global__ void test_ds_fmin_func(float src, float *__restrict shared) {
// CHECK-LABEL: @_Z14test_is_sharedPf(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[X:%.*]] = alloca float*, align 8, addrspace(5)
-// CHECK-NEXT: [[X_ADDR:%.*]] = alloca float*, align 8, addrspace(5)
+// CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[RET:%.*]] = alloca i8, align 1, addrspace(5)
-// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[X]] to float**
-// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[X_ADDR]] to float**
-// CHECK-NEXT: [[RET_ASCAST:%.*]] = addrspacecast i8 addrspace(5)* [[RET]] to i8*
-// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float addrspace(1)* [[X_COERCE:%.*]] to float*
-// CHECK-NEXT: store float* [[TMP0]], float** [[X_ASCAST]], align 8
-// CHECK-NEXT: [[X1:%.*]] = load float*, float** [[X_ASCAST]], align 8
-// CHECK-NEXT: store float* [[X1]], float** [[X_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load float*, float** [[X_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[TMP1]] to i8*
-// CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.is.shared(i8* [[TMP2]])
+// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
+// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
+// CHECK-NEXT: [[RET_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RET]] to ptr
+// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[X_COERCE:%.*]] to ptr
+// CHECK-NEXT: store ptr [[TMP0]], ptr [[X_ASCAST]], align 8
+// CHECK-NEXT: [[X1:%.*]] = load ptr, ptr [[X_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[X1]], ptr [[X_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[TMP1]])
// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP3]] to i8
-// CHECK-NEXT: store i8 [[FROMBOOL]], i8* [[RET_ASCAST]], align 1
+// CHECK-NEXT: store i8 [[FROMBOOL]], ptr [[RET_ASCAST]], align 1
// CHECK-NEXT: ret void
//
__global__ void test_is_shared(float *x){
diff --git a/clang/test/CodeGenCUDA/builtins-unsafe-atomics-gfx90a.cu b/clang/test/CodeGenCUDA/builtins-unsafe-atomics-gfx90a.cu
index 23be5e5d0f76b..66ec200a8e6d4 100644
--- a/clang/test/CodeGenCUDA/builtins-unsafe-atomics-gfx90a.cu
+++ b/clang/test/CodeGenCUDA/builtins-unsafe-atomics-gfx90a.cu
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -target-cpu gfx90a -x hip \
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx90a -x hip \
// RUN: -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -emit-llvm %s \
// RUN: -o - | FileCheck %s
@@ -6,14 +6,14 @@
typedef __attribute__((address_space(3))) float *LP;
// CHECK-LABEL: test_ds_atomic_add_f32
-// CHECK: %[[ADDR_ADDR:.*]] = alloca float*, align 8, addrspace(5)
-// CHECK: %[[ADDR_ADDR_ASCAST_PTR:.*]] = addrspacecast float* addrspace(5)* %[[ADDR_ADDR]] to float**
-// CHECK: store float* %addr, float** %[[ADDR_ADDR_ASCAST_PTR]], align 8
-// CHECK: %[[ADDR_ADDR_ASCAST:.*]] = load float*, float** %[[ADDR_ADDR_ASCAST_PTR]], align 8
-// CHECK: %[[AS_CAST:.*]] = addrspacecast float* %[[ADDR_ADDR_ASCAST]] to float addrspace(3)*
-// CHECK: %3 = call contract float @llvm.amdgcn.ds.fadd.f32(float addrspace(3)* %[[AS_CAST]]
-// CHECK: %4 = load float*, float** %rtn.ascast, align 8
-// CHECK: store float %3, float* %4, align 4
+// CHECK: %[[ADDR_ADDR:.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK: %[[ADDR_ADDR_ASCAST_PTR:.*]] = addrspacecast ptr addrspace(5) %[[ADDR_ADDR]] to ptr
+// CHECK: store ptr %addr, ptr %[[ADDR_ADDR_ASCAST_PTR]], align 8
+// CHECK: %[[ADDR_ADDR_ASCAST:.*]] = load ptr, ptr %[[ADDR_ADDR_ASCAST_PTR]], align 8
+// CHECK: %[[AS_CAST:.*]] = addrspacecast ptr %[[ADDR_ADDR_ASCAST]] to ptr addrspace(3)
+// CHECK: %3 = call contract float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) %[[AS_CAST]]
+// CHECK: %4 = load ptr, ptr %rtn.ascast, align 8
+// CHECK: store float %3, ptr %4, align 4
__device__ void test_ds_atomic_add_f32(float *addr, float val) {
float *rtn;
*rtn = __builtin_amdgcn_ds_faddf((LP)addr, val, 0, 0, 0);
diff --git a/clang/test/CodeGenCUDA/const-var.cu b/clang/test/CodeGenCUDA/const-var.cu
index f667bcd3d6dfc..70d4df18dfeef 100644
--- a/clang/test/CodeGenCUDA/const-var.cu
+++ b/clang/test/CodeGenCUDA/const-var.cu
@@ -1,12 +1,12 @@
// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
// RUN: -emit-llvm -o - | FileCheck -check-prefix=DEV %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-linux-gnu -x hip %s \
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x hip %s \
// RUN: -emit-llvm -o - | FileCheck -check-prefix=HOST %s
// Negative tests.
-// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
// RUN: -emit-llvm -o - | FileCheck -check-prefix=DEV-NEG %s
#include "Inputs/cuda.h"
@@ -15,12 +15,12 @@
// Both are promoted to device side.
// DEV-DAG: @_ZN5Test1L1aE = internal addrspace(4) constant i32 1
-// DEV-DAG: @_ZN5Test11B2p1E = addrspace(4) externally_initialized constant i32* addrspacecast (i32 addrspace(4)* @_ZN5Test1L1aE to i32*)
-// DEV-DAG: @_ZN5Test11B2p2E = addrspace(4) externally_initialized constant i32* addrspacecast (i32 addrspace(4)* @_ZN5Test1L1aE to i32*)
+// DEV-DAG: @_ZN5Test11B2p1E = addrspace(4) externally_initialized constant ptr addrspacecast (ptr addrspace(4) @_ZN5Test1L1aE to ptr)
+// DEV-DAG: @_ZN5Test11B2p2E = addrspace(4) externally_initialized constant ptr addrspacecast (ptr addrspace(4) @_ZN5Test1L1aE to ptr)
// DEV-DAG: @_ZN5Test12b2E = addrspace(1) externally_initialized global i32 1
// HOST-DAG: @_ZN5Test1L1aE = internal constant i32 1
-// HOST-DAG: @_ZN5Test11B2p1E = constant i32* @_ZN5Test1L1aE
-// HOST-DAG: @_ZN5Test11B2p2E = internal constant i32* undef
+// HOST-DAG: @_ZN5Test11B2p1E = constant ptr @_ZN5Test1L1aE
+// HOST-DAG: @_ZN5Test11B2p2E = internal constant ptr undef
// HOST-DAG: @_ZN5Test12b1E = global i32 1
// HOST-DAG: @_ZN5Test12b2E = internal global i32 undef
namespace Test1 {
@@ -42,7 +42,7 @@ __device__ int b2 = B::p1 == B::p2;
// DEV-NEG-NOT: @_ZN5Test2L1aE
// DEV-NEG-NOT: @_ZN5Test21B1pE
// HOST-DAG: @_ZN5Test21aE = global i32 1
-// HOST-DAG: @_ZN5Test21B1pE = constant i32* @_ZN5Test21aE
+// HOST-DAG: @_ZN5Test21B1pE = constant ptr @_ZN5Test21aE
namespace Test2 {
int a = 1;
diff --git a/clang/test/CodeGenCUDA/cuda-builtin-vars.cu b/clang/test/CodeGenCUDA/cuda-builtin-vars.cu
index aae456614ef4e..e76e7a2f82529 100644
--- a/clang/test/CodeGenCUDA/cuda-builtin-vars.cu
+++ b/clang/test/CodeGenCUDA/cuda-builtin-vars.cu
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -no-opaque-pointers "-triple" "nvptx-nvidia-cuda" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
+// RUN: %clang_cc1 "-triple" "nvptx-nvidia-cuda" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
#include "__clang_cuda_builtin_vars.h"
-// CHECK: define{{.*}} void @_Z6kernelPi(i32* noundef %out)
+// CHECK: define{{.*}} void @_Z6kernelPi(ptr noundef %out)
__attribute__((global))
void kernel(int *out) {
int i = 0;
diff --git a/clang/test/CodeGenCUDA/debug-info-address-class.cu b/clang/test/CodeGenCUDA/debug-info-address-class.cu
index d9499dc91af27..5e0f775cbe9e3 100644
--- a/clang/test/CodeGenCUDA/debug-info-address-class.cu
+++ b/clang/test/CodeGenCUDA/debug-info-address-class.cu
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -no-opaque-pointers -emit-llvm %s -o - -fcuda-is-device -triple nvptx-unknown-unknown -debug-info-kind=limited -dwarf-version=2 -debugger-tuning=gdb | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple nvptx-unknown-unknown -debug-info-kind=limited -dwarf-version=2 -debugger-tuning=gdb | FileCheck %s
#include "Inputs/cuda.h"
@@ -14,12 +14,12 @@ __device__ __constant__ int FileVar2;
__device__ void kernel1(
// CHECK-DAG: ![[ARG:[0-9]+]] = !DILocalVariable(name: "Arg", arg: {{[0-9]+}}, scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
- // CHECK-DAG: call void @llvm.dbg.declare(metadata i32* {{.*}}, metadata ![[ARG]], metadata !DIExpression()), !dbg !{{[0-9]+}}
+ // CHECK-DAG: call void @llvm.dbg.declare(metadata ptr {{.*}}, metadata ![[ARG]], metadata !DIExpression()), !dbg !{{[0-9]+}}
int Arg) {
// CHECK-DAG: ![[FUNCVAR0:[0-9]+]] = distinct !DIGlobalVariable(name: "FuncVar0", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: true, isDefinition: true)
// CHECK-DAG: !DIGlobalVariableExpression(var: ![[FUNCVAR0]], expr: !DIExpression(DW_OP_constu, 8, DW_OP_swap, DW_OP_xderef))
__shared__ int FuncVar0;
// CHECK-DAG: ![[FUNCVAR1:[0-9]+]] = !DILocalVariable(name: "FuncVar1", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
- // CHECK-DAG: call void @llvm.dbg.declare(metadata i32* {{.*}}, metadata ![[FUNCVAR1]], metadata !DIExpression()), !dbg !{{[0-9]+}}
+ // CHECK-DAG: call void @llvm.dbg.declare(metadata ptr {{.*}}, metadata ![[FUNCVAR1]], metadata !DIExpression()), !dbg !{{[0-9]+}}
int FuncVar1;
}
diff --git a/clang/test/CodeGenCUDA/device-var-init.cu b/clang/test/CodeGenCUDA/device-var-init.cu
index 17dbb00b2ce0d..226b7e295f4b4 100644
--- a/clang/test/CodeGenCUDA/device-var-init.cu
+++ b/clang/test/CodeGenCUDA/device-var-init.cu
@@ -4,12 +4,12 @@
// Make sure we don't allow dynamic initialization for device
// variables, but accept empty constructors allowed by CUDA.
-// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \
// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=DEVICE,NVPTX %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx64-nvidia-cuda -std=c++11 \
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -std=c++11 \
// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=HOST %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn -fcuda-is-device -std=c++11 \
+// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -std=c++11 \
// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=DEVICE,AMDGCN %s
#ifdef __clang__
@@ -192,69 +192,69 @@ __device__ void df() {
// NVPTX: %[[t_b_ned:.*]] = alloca %struct.T_B_NED
// NVPTX: %[[t_f_ned:.*]] = alloca %struct.T_F_NED
// NVPTX: %[[t_fa_ned:.*]] = alloca %struct.T_FA_NED
- // AMDGCN: %[[ec:.*]] ={{.*}} addrspacecast %struct.EC addrspace(5)* %ec to %struct.EC*
- // AMDGCN: %[[ed:.*]] ={{.*}} addrspacecast %struct.ED addrspace(5)* %ed to %struct.ED*
- // AMDGCN: %[[ecd:.*]] ={{.*}} addrspacecast %struct.ECD addrspace(5)* %ecd to %struct.ECD*
- // AMDGCN: %[[etc:.*]] ={{.*}} addrspacecast %struct.ETC addrspace(5)* %etc to %struct.ETC*
- // AMDGCN: %[[uc:.*]] ={{.*}} addrspacecast %struct.UC addrspace(5)* %uc to %struct.UC*
- // AMDGCN: %[[ud:.*]] ={{.*}} addrspacecast %struct.UD addrspace(5)* %ud to %struct.UD*
- // AMDGCN: %[[eci:.*]] ={{.*}} addrspacecast %struct.ECI addrspace(5)* %eci to %struct.ECI*
- // AMDGCN: %[[nec:.*]] ={{.*}} addrspacecast %struct.NEC addrspace(5)* %nec to %struct.NEC*
- // AMDGCN: %[[ned:.*]] ={{.*}} addrspacecast %struct.NED addrspace(5)* %ned to %struct.NED*
- // AMDGCN: %[[ncv:.*]] ={{.*}} addrspacecast %struct.NCV addrspace(5)* %ncv to %struct.NCV*
- // AMDGCN: %[[vd:.*]] ={{.*}} addrspacecast %struct.VD addrspace(5)* %vd to %struct.VD*
- // AMDGCN: %[[ncf:.*]] ={{.*}} addrspacecast %struct.NCF addrspace(5)* %ncf to %struct.NCF*
- // AMDGCN: %[[ncfs:.*]] ={{.*}} addrspacecast %struct.NCFS addrspace(5)* %ncfs to %struct.NCFS*
- // AMDGCN: %[[utc:.*]] ={{.*}} addrspacecast %struct.UTC addrspace(5)* %utc to %struct.UTC*
- // AMDGCN: %[[netc:.*]] ={{.*}} addrspacecast %struct.NETC addrspace(5)* %netc to %struct.NETC*
- // AMDGCN: %[[ec_i_ec:.*]] ={{.*}} addrspacecast %struct.EC_I_EC addrspace(5)* %ec_i_ec to %struct.EC_I_EC*
- // AMDGCN: %[[ec_i_ec1:.*]] ={{.*}} addrspacecast %struct.EC_I_EC1 addrspace(5)* %ec_i_ec1 to %struct.EC_I_EC1*
- // AMDGCN: %[[t_v_t:.*]] ={{.*}} addrspacecast %struct.T_V_T addrspace(5)* %t_v_t to %struct.T_V_T*
- // AMDGCN: %[[t_b_nec:.*]] ={{.*}} addrspacecast %struct.T_B_NEC addrspace(5)* %t_b_nec to %struct.T_B_NEC*
- // AMDGCN: %[[t_f_nec:.*]] ={{.*}} addrspacecast %struct.T_F_NEC addrspace(5)* %t_f_nec to %struct.T_F_NEC*
- // AMDGCN: %[[t_fa_nec:.*]] ={{.*}} addrspacecast %struct.T_FA_NEC addrspace(5)* %t_fa_nec to %struct.T_FA_NEC*
- // AMDGCN: %[[t_b_ned:.*]] ={{.*}} addrspacecast %struct.T_B_NED addrspace(5)* %t_b_ned to %struct.T_B_NED*
- // AMDGCN: %[[t_f_ned:.*]] ={{.*}} addrspacecast %struct.T_F_NED addrspace(5)* %t_f_ned to %struct.T_F_NED*
- // AMDGCN: %[[t_fa_ned:.*]] ={{.*}} addrspacecast %struct.T_FA_NED addrspace(5)* %t_fa_ned to %struct.T_FA_NED*
+ // AMDGCN: %[[ec:.*]] ={{.*}} addrspacecast ptr addrspace(5) %ec to ptr
+ // AMDGCN: %[[ed:.*]] ={{.*}} addrspacecast ptr addrspace(5) %ed to ptr
+ // AMDGCN: %[[ecd:.*]] ={{.*}} addrspacecast ptr addrspace(5) %ecd to ptr
+ // AMDGCN: %[[etc:.*]] ={{.*}} addrspacecast ptr addrspace(5) %etc to ptr
+ // AMDGCN: %[[uc:.*]] ={{.*}} addrspacecast ptr addrspace(5) %uc to ptr
+ // AMDGCN: %[[ud:.*]] ={{.*}} addrspacecast ptr addrspace(5) %ud to ptr
+ // AMDGCN: %[[eci:.*]] ={{.*}} addrspacecast ptr addrspace(5) %eci to ptr
+ // AMDGCN: %[[nec:.*]] ={{.*}} addrspacecast ptr addrspace(5) %nec to ptr
+ // AMDGCN: %[[ned:.*]] ={{.*}} addrspacecast ptr addrspace(5) %ned to ptr
+ // AMDGCN: %[[ncv:.*]] ={{.*}} addrspacecast ptr addrspace(5) %ncv to ptr
+ // AMDGCN: %[[vd:.*]] ={{.*}} addrspacecast ptr addrspace(5) %vd to ptr
+ // AMDGCN: %[[ncf:.*]] ={{.*}} addrspacecast ptr addrspace(5) %ncf to ptr
+ // AMDGCN: %[[ncfs:.*]] ={{.*}} addrspacecast ptr addrspace(5) %ncfs to ptr
+ // AMDGCN: %[[utc:.*]] ={{.*}} addrspacecast ptr addrspace(5) %utc to ptr
+ // AMDGCN: %[[netc:.*]] ={{.*}} addrspacecast ptr addrspace(5) %netc to ptr
+ // AMDGCN: %[[ec_i_ec:.*]] ={{.*}} addrspacecast ptr addrspace(5) %ec_i_ec to ptr
+ // AMDGCN: %[[ec_i_ec1:.*]] ={{.*}} addrspacecast ptr addrspace(5) %ec_i_ec1 to ptr
+ // AMDGCN: %[[t_v_t:.*]] ={{.*}} addrspacecast ptr addrspace(5) %t_v_t to ptr
+ // AMDGCN: %[[t_b_nec:.*]] ={{.*}} addrspacecast ptr addrspace(5) %t_b_nec to ptr
+ // AMDGCN: %[[t_f_nec:.*]] ={{.*}} addrspacecast ptr addrspace(5) %t_f_nec to ptr
+ // AMDGCN: %[[t_fa_nec:.*]] ={{.*}} addrspacecast ptr addrspace(5) %t_fa_nec to ptr
+ // AMDGCN: %[[t_b_ned:.*]] ={{.*}} addrspacecast ptr addrspace(5) %t_b_ned to ptr
+ // AMDGCN: %[[t_f_ned:.*]] ={{.*}} addrspacecast ptr addrspace(5) %t_f_ned to ptr
+ // AMDGCN: %[[t_fa_ned:.*]] ={{.*}} addrspacecast ptr addrspace(5) %t_fa_ned to ptr
T t;
// DEVICE-NOT: call
EC ec;
- // DEVICE: call void @_ZN2ECC1Ev(%struct.EC* {{[^,]*}} %[[ec]])
+ // DEVICE: call void @_ZN2ECC1Ev(ptr {{[^,]*}} %[[ec]])
ED ed;
// DEVICE-NOT: call
ECD ecd;
- // DEVICE: call void @_ZN3ECDC1Ev(%struct.ECD* {{[^,]*}} %[[ecd]])
+ // DEVICE: call void @_ZN3ECDC1Ev(ptr {{[^,]*}} %[[ecd]])
ETC etc;
- // DEVICE: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* {{[^,]*}} %[[etc]])
+ // DEVICE: call void @_ZN3ETCC1IJEEEDpT_(ptr {{[^,]*}} %[[etc]])
UC uc;
// undefined constructor -- not allowed
- // DEVICE: call void @_ZN2UCC1Ev(%struct.UC* {{[^,]*}} %[[uc]])
+ // DEVICE: call void @_ZN2UCC1Ev(ptr {{[^,]*}} %[[uc]])
UD ud;
// undefined destructor -- not allowed
// DEVICE-NOT: call
ECI eci;
// empty constructor w/ initializer list -- not allowed
- // DEVICE: call void @_ZN3ECIC1Ev(%struct.ECI* {{[^,]*}} %[[eci]])
+ // DEVICE: call void @_ZN3ECIC1Ev(ptr {{[^,]*}} %[[eci]])
NEC nec;
// non-empty constructor -- not allowed
- // DEVICE: call void @_ZN3NECC1Ev(%struct.NEC* {{[^,]*}} %[[nec]])
+ // DEVICE: call void @_ZN3NECC1Ev(ptr {{[^,]*}} %[[nec]])
// non-empty destructor -- not allowed
NED ned;
// no-constructor, virtual method -- not allowed
- // DEVICE: call void @_ZN3NCVC1Ev(%struct.NCV* {{[^,]*}} %[[ncv]])
+ // DEVICE: call void @_ZN3NCVC1Ev(ptr {{[^,]*}} %[[ncv]])
NCV ncv;
// DEVICE-NOT: call
VD vd;
- // DEVICE: call void @_ZN2VDC1Ev(%struct.VD* {{[^,]*}} %[[vd]])
+ // DEVICE: call void @_ZN2VDC1Ev(ptr {{[^,]*}} %[[vd]])
NCF ncf;
- // DEVICE: call void @_ZN3NCFC1Ev(%struct.NCF* {{[^,]*}} %[[ncf]])
+ // DEVICE: call void @_ZN3NCFC1Ev(ptr {{[^,]*}} %[[ncf]])
NCFS ncfs;
- // DEVICE: call void @_ZN4NCFSC1Ev(%struct.NCFS* {{[^,]*}} %[[ncfs]])
+ // DEVICE: call void @_ZN4NCFSC1Ev(ptr {{[^,]*}} %[[ncfs]])
UTC utc;
- // DEVICE: call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* {{[^,]*}} %[[utc]])
+ // DEVICE: call void @_ZN3UTCC1IJEEEDpT_(ptr {{[^,]*}} %[[utc]])
NETC netc;
- // DEVICE: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* {{[^,]*}} %[[netc]])
+ // DEVICE: call void @_ZN4NETCC1IJEEEDpT_(ptr {{[^,]*}} %[[netc]])
T_B_T t_b_t;
// DEVICE-NOT: call
T_F_T t_f_t;
@@ -262,17 +262,17 @@ __device__ void df() {
T_FA_T t_fa_t;
// DEVICE-NOT: call
EC_I_EC ec_i_ec;
- // DEVICE: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* {{[^,]*}} %[[ec_i_ec]])
+ // DEVICE: call void @_ZN7EC_I_ECC1Ev(ptr {{[^,]*}} %[[ec_i_ec]])
EC_I_EC1 ec_i_ec1;
- // DEVICE: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* {{[^,]*}} %[[ec_i_ec1]])
+ // DEVICE: call void @_ZN8EC_I_EC1C1Ev(ptr {{[^,]*}} %[[ec_i_ec1]])
T_V_T t_v_t;
- // DEVICE: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* {{[^,]*}} %[[t_v_t]])
+ // DEVICE: call void @_ZN5T_V_TC1Ev(ptr {{[^,]*}} %[[t_v_t]])
T_B_NEC t_b_nec;
- // DEVICE: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* {{[^,]*}} %[[t_b_nec]])
+ // DEVICE: call void @_ZN7T_B_NECC1Ev(ptr {{[^,]*}} %[[t_b_nec]])
T_F_NEC t_f_nec;
- // DEVICE: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* {{[^,]*}} %[[t_f_nec]])
+ // DEVICE: call void @_ZN7T_F_NECC1Ev(ptr {{[^,]*}} %[[t_f_nec]])
T_FA_NEC t_fa_nec;
- // DEVICE: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* {{[^,]*}} %[[t_fa_nec]])
+ // DEVICE: call void @_ZN8T_FA_NECC1Ev(ptr {{[^,]*}} %[[t_fa_nec]])
T_B_NED t_b_ned;
// DEVICE-NOT: call
T_F_NED t_f_ned;
@@ -280,9 +280,9 @@ __device__ void df() {
T_FA_NED t_fa_ned;
// DEVICE-NOT: call
static __shared__ EC s_ec;
- // DEVICE-NOT: call void @_ZN2ECC1Ev(%struct.EC* addrspacecast (%struct.EC addrspace(3)* @_ZZ2dfvE4s_ec to %struct.EC*))
+ // DEVICE-NOT: call void @_ZN2ECC1Ev(ptr addrspacecast (ptr addrspace(3) @_ZZ2dfvE4s_ec to ptr))
static __shared__ ETC s_etc;
- // DEVICE-NOT: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* addrspacecast (%struct.ETC addrspace(3)* @_ZZ2dfvE5s_etc to %struct.ETC*))
+ // DEVICE-NOT: call void @_ZN3ETCC1IJEEEDpT_(ptr addrspacecast (ptr addrspace(3) @_ZZ2dfvE5s_etc to ptr))
static const int const_array[] = {1, 2, 3, 4, 5};
static const int const_int = 123;
@@ -291,14 +291,14 @@ __device__ void df() {
df(); // DEVICE: call void @_Z2dfv()
// Verify that we only call non-empty destructors
- // DEVICE-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED* {{[^,]*}} %[[t_fa_ned]])
- // DEVICE-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED* {{[^,]*}} %[[t_f_ned]])
- // DEVICE-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED* {{[^,]*}} %[[t_b_ned]])
- // DEVICE-NEXT: call void @_ZN2VDD1Ev(%struct.VD* {{[^,]*}} %[[vd]])
- // DEVICE-NEXT: call void @_ZN3NEDD1Ev(%struct.NED* {{[^,]*}} %[[ned]])
- // DEVICE-NEXT: call void @_ZN2UDD1Ev(%struct.UD* {{[^,]*}} %[[ud]])
- // DEVICE-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD* {{[^,]*}} %[[ecd]])
- // DEVICE-NEXT: call void @_ZN2EDD1Ev(%struct.ED* {{[^,]*}} %[[ed]])
+ // DEVICE-NEXT: call void @_ZN8T_FA_NEDD1Ev(ptr {{[^,]*}} %[[t_fa_ned]])
+ // DEVICE-NEXT: call void @_ZN7T_F_NEDD1Ev(ptr {{[^,]*}} %[[t_f_ned]])
+ // DEVICE-NEXT: call void @_ZN7T_B_NEDD1Ev(ptr {{[^,]*}} %[[t_b_ned]])
+ // DEVICE-NEXT: call void @_ZN2VDD1Ev(ptr {{[^,]*}} %[[vd]])
+ // DEVICE-NEXT: call void @_ZN3NEDD1Ev(ptr {{[^,]*}} %[[ned]])
+ // DEVICE-NEXT: call void @_ZN2UDD1Ev(ptr {{[^,]*}} %[[ud]])
+ // DEVICE-NEXT: call void @_ZN3ECDD1Ev(ptr {{[^,]*}} %[[ecd]])
+ // DEVICE-NEXT: call void @_ZN2EDD1Ev(ptr {{[^,]*}} %[[ed]])
// DEVICE-NEXT: ret void
}
diff --git a/clang/test/CodeGenCUDA/device-var-linkage.cu b/clang/test/CodeGenCUDA/device-var-linkage.cu
index a214feb0541f9..3c2efb57525c9 100644
--- a/clang/test/CodeGenCUDA/device-var-linkage.cu
+++ b/clang/test/CodeGenCUDA/device-var-linkage.cu
@@ -1,16 +1,16 @@
-// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn -fcuda-is-device \
+// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device \
// RUN: -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefixes=DEV,NORDC %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn -fcuda-is-device \
+// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device \
// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefixes=DEV,RDC %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-gnu-linux \
+// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux \
// RUN: -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefixes=HOST,NORDC-H %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-gnu-linux \
+// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux \
// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefixes=HOST,RDC-H %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx -fcuda-is-device \
+// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - %s \
// RUN: | FileCheck -check-prefixes=CUDA %s
@@ -24,9 +24,9 @@ __device__ int v1;
// NORDC-H-DAG: @v2 = internal global i32 undef
// RDC-H-DAG: @v2 = global i32 undef
__constant__ int v2;
-// DEV-DAG: @v3 = addrspace(1) externally_initialized global i32 addrspace(1)* null
-// NORDC-H-DAG: @v3 = internal externally_initialized global i32* null
-// RDC-H-DAG: @v3 = externally_initialized global i32* null
+// DEV-DAG: @v3 = addrspace(1) externally_initialized global ptr addrspace(1) null
+// NORDC-H-DAG: @v3 = internal externally_initialized global ptr null
+// RDC-H-DAG: @v3 = externally_initialized global ptr null
#if __HIP__
__managed__ int v3;
#endif
@@ -37,8 +37,8 @@ extern __device__ int ev1;
// DEV-DAG: @ev2 = external addrspace(4) global i32
// HOST-DAG: @ev2 = external global i32
extern __constant__ int ev2;
-// DEV-DAG: @ev3 = external addrspace(1) externally_initialized global i32 addrspace(1)*
-// HOST-DAG: @ev3 = external externally_initialized global i32*
+// DEV-DAG: @ev3 = external addrspace(1) externally_initialized global ptr addrspace(1)
+// HOST-DAG: @ev3 = external externally_initialized global ptr
#if __HIP__
extern __managed__ int ev3;
#endif
@@ -53,9 +53,9 @@ static __device__ int sv1;
// HOST-DAG: @_ZL3sv2 = internal global i32 undef
// CUDA-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized global i32 0
static __constant__ int sv2;
-// NORDC-DAG: @_ZL3sv3 = addrspace(1) externally_initialized global i32 addrspace(1)* null
-// RDC-DAG: @_ZL3sv3.static.[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
-// HOST-DAG: @_ZL3sv3 = internal externally_initialized global i32* null
+// NORDC-DAG: @_ZL3sv3 = addrspace(1) externally_initialized global ptr addrspace(1) null
+// RDC-DAG: @_ZL3sv3.static.[[HASH]] = addrspace(1) externally_initialized global ptr addrspace(1) null
+// HOST-DAG: @_ZL3sv3 = internal externally_initialized global ptr null
#if __HIP__
static __managed__ int sv3;
#endif
diff --git a/clang/test/CodeGenCUDA/host-used-device-var.cu b/clang/test/CodeGenCUDA/host-used-device-var.cu
index 2ffaa54db9b33..2c0d06d07c6f2 100644
--- a/clang/test/CodeGenCUDA/host-used-device-var.cu
+++ b/clang/test/CodeGenCUDA/host-used-device-var.cu
@@ -1,16 +1,16 @@
// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
// RUN: -std=c++17 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
// RUN: | FileCheck -check-prefix=DEV %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-linux-gnu -x hip %s \
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x hip %s \
// RUN: -std=c++17 -O3 -emit-llvm -o - | FileCheck -check-prefix=HOST %s
// Negative tests.
-// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
// RUN: -std=c++17 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
// RUN: | FileCheck -check-prefix=DEV-NEG %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-linux-gnu -x hip %s \
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x hip %s \
// RUN: -std=c++17 -O3 -emit-llvm -o - | FileCheck -check-prefix=HOST-NEG %s
#include "Inputs/cuda.h"
@@ -62,7 +62,7 @@ __device__ T add_func (T x, T y)
return x + y;
}
-// DEV-DAG: @_Z10p_add_funcIiE = linkonce_odr addrspace(1) externally_initialized global i32 (i32, i32)* @_Z8add_funcIiET_S0_S0_
+// DEV-DAG: @_Z10p_add_funcIiE = linkonce_odr addrspace(1) externally_initialized global ptr @_Z8add_funcIiET_S0_S0_
template <typename T>
__device__ func_t<T> p_add_func = add_func<T>;
diff --git a/clang/test/CodeGenCUDA/kernel-args-alignment.cu b/clang/test/CodeGenCUDA/kernel-args-alignment.cu
index 1c008dcd8a5db..27b1315681521 100644
--- a/clang/test/CodeGenCUDA/kernel-args-alignment.cu
+++ b/clang/test/CodeGenCUDA/kernel-args-alignment.cu
@@ -1,11 +1,11 @@
// New CUDA kernel launch sequence does not require explicit specification of
// size/offset for each argument, so only the old way is tested.
//
-// RUN: %clang_cc1 -no-opaque-pointers --std=c++11 -triple x86_64-unknown-linux-gnu -emit-llvm \
+// RUN: %clang_cc1 --std=c++11 -triple x86_64-unknown-linux-gnu -emit-llvm \
// RUN: -target-sdk-version=8.0 -o - %s \
// RUN: | FileCheck -check-prefixes=HOST-OLD,CHECK %s
-// RUN: %clang_cc1 -no-opaque-pointers --std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda \
+// RUN: %clang_cc1 --std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda \
// RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=DEVICE,CHECK %s
#include "Inputs/cuda.h"
@@ -22,7 +22,7 @@ struct S {
// Clang should generate a packed LLVM struct for S (denoted by the <>s),
// otherwise this test isn't interesting.
-// CHECK: %struct.S = type <{ i32*, i8, %struct.U, [5 x i8] }>
+// CHECK: %struct.S = type <{ ptr, i8, %struct.U, [5 x i8] }>
static_assert(alignof(S) == 8, "Unexpected alignment.");
@@ -36,5 +36,5 @@ static_assert(alignof(S) == 8, "Unexpected alignment.");
// HOST-OLD: call i32 @cudaSetupArgument({{[^,]*}}, i64 8, i64 24)
// DEVICE-LABEL: @_Z6kernelc1SPi
-// DEVICE-SAME: i8{{[^,]*}}, %struct.S* noundef byval(%struct.S) align 8{{[^,]*}}, i32*
+// DEVICE-SAME: i8{{[^,]*}}, ptr noundef byval(%struct.S) align 8{{[^,]*}}, ptr
__global__ void kernel(char a, S s, int *b) {}
diff --git a/clang/test/CodeGenCUDA/kernel-args.cu b/clang/test/CodeGenCUDA/kernel-args.cu
index 46a2b69a4c3d6..5f064694223b5 100644
--- a/clang/test/CodeGenCUDA/kernel-args.cu
+++ b/clang/test/CodeGenCUDA/kernel-args.cu
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -no-opaque-pointers -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device \
// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCN %s
-// RUN: %clang_cc1 -no-opaque-pointers -x cuda -triple nvptx64-nvidia-cuda- -fcuda-is-device \
+// RUN: %clang_cc1 -x cuda -triple nvptx64-nvidia-cuda- -fcuda-is-device \
// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=NVPTX %s
#include "Inputs/cuda.h"
@@ -9,15 +9,15 @@ struct A {
float *p;
};
-// AMDGCN: define{{.*}} amdgpu_kernel void @_Z6kernel1A(%struct.A addrspace(4)* byref(%struct.A) align 8 %{{.+}})
-// NVPTX: define{{.*}} void @_Z6kernel1A(%struct.A* noundef byval(%struct.A) align 8 %x)
+// AMDGCN: define{{.*}} amdgpu_kernel void @_Z6kernel1A(ptr addrspace(4) byref(%struct.A) align 8 %{{.+}})
+// NVPTX: define{{.*}} void @_Z6kernel1A(ptr noundef byval(%struct.A) align 8 %x)
__global__ void kernel(A x) {
}
class Kernel {
public:
- // AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A addrspace(4)* byref(%struct.A) align 8 %{{.+}})
- // NVPTX: define{{.*}} void @_ZN6Kernel12memberKernelE1A(%struct.A* noundef byval(%struct.A) align 8 %x)
+ // AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(ptr addrspace(4) byref(%struct.A) align 8 %{{.+}})
+ // NVPTX: define{{.*}} void @_ZN6Kernel12memberKernelE1A(ptr noundef byval(%struct.A) align 8 %x)
static __global__ void memberKernel(A x){}
template<typename T> static __global__ void templateMemberKernel(T x) {}
};
@@ -30,11 +30,11 @@ void launch(void*);
void test() {
Kernel K;
- // AMDGCN: define{{.*}} amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A addrspace(4)* byref(%struct.A) align 8 %{{.+}}
- // NVPTX: define{{.*}} void @_Z14templateKernelI1AEvT_(%struct.A* noundef byval(%struct.A) align 8 %x)
+ // AMDGCN: define{{.*}} amdgpu_kernel void @_Z14templateKernelI1AEvT_(ptr addrspace(4) byref(%struct.A) align 8 %{{.+}}
+ // NVPTX: define{{.*}} void @_Z14templateKernelI1AEvT_(ptr noundef byval(%struct.A) align 8 %x)
launch((void*)templateKernel<A>);
- // AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A addrspace(4)* byref(%struct.A) align 8 %{{.+}}
- // NVPTX: define{{.*}} void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A* noundef byval(%struct.A) align 8 %x)
+ // AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(ptr addrspace(4) byref(%struct.A) align 8 %{{.+}}
+ // NVPTX: define{{.*}} void @_ZN6Kernel20templateMemberKernelI1AEEvT_(ptr noundef byval(%struct.A) align 8 %x)
launch((void*)Kernel::templateMemberKernel<A>);
}
diff --git a/clang/test/CodeGenCUDA/kernel-dbg-info.cu b/clang/test/CodeGenCUDA/kernel-dbg-info.cu
index 316e5ed15d8c1..524694299d5de 100644
--- a/clang/test/CodeGenCUDA/kernel-dbg-info.cu
+++ b/clang/test/CodeGenCUDA/kernel-dbg-info.cu
@@ -1,25 +1,25 @@
// RUN: echo "GPU binary would be here" > %t
-// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-linux-gnu -emit-llvm %s -O0 \
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -O0 \
// RUN: -fcuda-include-gpubinary %t -debug-info-kind=limited \
// RUN: -o - -x hip | FileCheck -check-prefixes=CHECK,O0 %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -emit-llvm %s -O0 \
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -O0 \
// RUN: -fcuda-include-gpubinary %t -debug-info-kind=limited \
// RUN: -o - -x hip -fcuda-is-device | FileCheck -check-prefix=DEV %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-linux-gnu -emit-llvm %s -O0 \
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -O0 \
// RUN: -fcuda-include-gpubinary %t -debug-info-kind=limited \
// RUN: -o - -x hip -debugger-tuning=gdb -dwarf-version=4 \
// RUN: | FileCheck -check-prefixes=CHECK,O0 %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -emit-llvm %s -O0 \
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -O0 \
// RUN: -fcuda-include-gpubinary %t -debug-info-kind=limited \
// RUN: -o - -x hip -debugger-tuning=gdb -dwarf-version=4 \
// RUN: -fcuda-is-device | FileCheck -check-prefix=DEV %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-linux-gnu -emit-llvm %s -O3 \
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -O3 \
// RUN: -fcuda-include-gpubinary %t -debug-info-kind=limited \
// RUN: -o - -x hip -debugger-tuning=gdb -dwarf-version=4 | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -emit-llvm %s -O3 \
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -O3 \
// RUN: -fcuda-include-gpubinary %t -debug-info-kind=limited \
// RUN: -o - -x hip -debugger-tuning=gdb -dwarf-version=4 \
// RUN: -fcuda-is-device | FileCheck -check-prefix=DEV %s
@@ -31,7 +31,7 @@ extern "C" __global__ void ckernel(int *a) {
}
// Kernel symbol for launching kernel.
-// CHECK: @[[SYM:ckernel]] = constant void (i32*)* @__device_stub__ckernel, align 8
+// CHECK: @[[SYM:ckernel]] = constant ptr @__device_stub__ckernel, align 8
// Device side kernel names
// CHECK: @[[CKERN:[0-9]*]] = {{.*}} c"ckernel\00"
diff --git a/clang/test/CodeGenCUDA/kernel-stub-name.cu b/clang/test/CodeGenCUDA/kernel-stub-name.cu
index aef99d3c2d736..9884046fcd0fd 100644
--- a/clang/test/CodeGenCUDA/kernel-stub-name.cu
+++ b/clang/test/CodeGenCUDA/kernel-stub-name.cu
@@ -1,19 +1,19 @@
// RUN: echo "GPU binary would be here" > %t
-// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-linux-gnu -emit-llvm %s \
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
// RUN: -fcuda-include-gpubinary %t -o - -x hip\
// RUN: | FileCheck -check-prefixes=CHECK,GNU %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-linux-gnu -emit-llvm %s \
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
// RUN: -fcuda-include-gpubinary %t -o - -x hip\
// RUN: | FileCheck -check-prefix=NEG %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-pc-windows-msvc -emit-llvm %s \
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -emit-llvm %s \
// RUN: -aux-triple amdgcn-amd-amdhsa -fcuda-include-gpubinary \
// RUN: %t -o - -x hip\
// RUN: | FileCheck -check-prefixes=CHECK,MSVC %s
-// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-pc-windows-msvc -emit-llvm %s \
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -emit-llvm %s \
// RUN: -aux-triple amdgcn-amd-amdhsa -fcuda-include-gpubinary \
// RUN: %t -o - -x hip\
// RUN: | FileCheck -check-prefix=NEG %s
@@ -22,15 +22,15 @@
// Check kernel handles are emitted for non-MSVC target but not for MSVC target.
-// GNU: @[[HCKERN:ckernel]] = constant void ()* @[[CSTUB:__device_stub__ckernel]], align 8
-// GNU: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant void ()* @[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]], align 8
-// GNU: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant void ()* @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]], comdat, align 8
-// GNU: @[[HDKERN:_Z11kernel_declv]] = external constant void ()*, align 8
+// GNU: @[[HCKERN:ckernel]] = constant ptr @[[CSTUB:__device_stub__ckernel]], align 8
+// GNU: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant ptr @[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]], align 8
+// GNU: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant ptr @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]], comdat, align 8
+// GNU: @[[HDKERN:_Z11kernel_declv]] = external constant ptr, align 8
-// MSVC: @[[HCKERN:ckernel]] = dso_local constant void ()* @[[CSTUB:__device_stub__ckernel]], align 8
-// MSVC: @[[HNSKERN:"\?nskernel at ns@@YAXXZ.*"]] = dso_local constant void ()* @[[NSSTUB:"\?__device_stub__nskernel at ns@@YAXXZ"]], align 8
-// MSVC: @[[HTKERN:"\?\?\$kernelfunc at H@@YAXXZ.*"]] = linkonce_odr dso_local constant void ()* @[[TSTUB:"\?\?\$__device_stub__kernelfunc at H@@YAXXZ.*"]], comdat, align 8
-// MSVC: @[[HDKERN:"\?kernel_decl@@YAXXZ.*"]] = external dso_local constant void ()*, align 8
+// MSVC: @[[HCKERN:ckernel]] = dso_local constant ptr @[[CSTUB:__device_stub__ckernel]], align 8
+// MSVC: @[[HNSKERN:"\?nskernel at ns@@YAXXZ.*"]] = dso_local constant ptr @[[NSSTUB:"\?__device_stub__nskernel at ns@@YAXXZ"]], align 8
+// MSVC: @[[HTKERN:"\?\?\$kernelfunc at H@@YAXXZ.*"]] = linkonce_odr dso_local constant ptr @[[TSTUB:"\?\?\$__device_stub__kernelfunc at H@@YAXXZ.*"]], comdat, align 8
+// MSVC: @[[HDKERN:"\?kernel_decl@@YAXXZ.*"]] = external dso_local constant ptr, align 8
extern "C" __global__ void ckernel() {}
@@ -104,10 +104,10 @@ extern "C" void fun2() {
// Check kernel handle is used for assigning a kernel to a function pointer.
// CHECK-LABEL: define{{.*}}@fun3()
-// CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr, align 8
-// CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr, align 8
-// CHECK: store i8* bitcast (void ()** @[[HCKERN]] to i8*), i8** @void_ptr, align 8
-// CHECK: store i8* bitcast (void ()** @[[HCKERN]] to i8*), i8** @void_ptr, align 8
+// CHECK: store ptr @[[HCKERN]], ptr @kernel_ptr, align 8
+// CHECK: store ptr @[[HCKERN]], ptr @kernel_ptr, align 8
+// CHECK: store ptr @[[HCKERN]], ptr @void_ptr, align 8
+// CHECK: store ptr @[[HCKERN]], ptr @void_ptr, align 8
extern "C" void fun3() {
kernel_ptr = ckernel;
kernel_ptr = &ckernel;
@@ -119,11 +119,10 @@ extern "C" void fun3() {
// used with triple chevron.
// CHECK-LABEL: define{{.*}}@fun4()
-// CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr
+// CHECK: store ptr @[[HCKERN]], ptr @kernel_ptr
// CHECK: call noundef i32 @{{.*hipConfigureCall}}
-// CHECK: %[[HANDLE:.*]] = load void ()*, void ()** @kernel_ptr, align 8
-// CHECK: %[[CAST:.*]] = bitcast void ()* %[[HANDLE]] to void ()**
-// CHECK: %[[STUB:.*]] = load void ()*, void ()** %[[CAST]], align 8
+// CHECK: %[[HANDLE:.*]] = load ptr, ptr @kernel_ptr, align 8
+// CHECK: %[[STUB:.*]] = load ptr, ptr %[[HANDLE]], align 8
// CHECK: call void %[[STUB]]()
extern "C" void fun4() {
kernel_ptr = ckernel;
@@ -133,10 +132,9 @@ extern "C" void fun4() {
// Check kernel handle is passed to a function.
// CHECK-LABEL: define{{.*}}@fun5()
-// CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr
-// CHECK: %[[HANDLE:.*]] = load void ()*, void ()** @kernel_ptr, align 8
-// CHECK: %[[CAST:.*]] = bitcast void ()* %[[HANDLE]] to i8*
-// CHECK: call void @launch(i8* noundef %[[CAST]])
+// CHECK: store ptr @[[HCKERN]], ptr @kernel_ptr
+// CHECK: %[[HANDLE:.*]] = load ptr, ptr @kernel_ptr, align 8
+// CHECK: call void @launch(ptr noundef %[[HANDLE]])
extern "C" void fun5() {
kernel_ptr = ckernel;
launch((void *)kernel_ptr);
diff --git a/clang/test/CodeGenCUDA/lambda-noinline.cu b/clang/test/CodeGenCUDA/lambda-noinline.cu
index de2196e63f074..be4c5fe7357d2 100644
--- a/clang/test/CodeGenCUDA/lambda-noinline.cu
+++ b/clang/test/CodeGenCUDA/lambda-noinline.cu
@@ -1,7 +1,7 @@
-// RUN: %clang_cc1 -no-opaque-pointers -x hip -emit-llvm -std=c++11 %s -o - \
+// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \
// RUN: -triple x86_64-linux-gnu \
// RUN: | FileCheck -check-prefix=HOST %s
-// RUN: %clang_cc1 -no-opaque-pointers -x hip -emit-llvm -std=c++11 %s -o - \
+// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \
// RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \
// RUN: | FileCheck -check-prefix=DEV %s
diff --git a/clang/test/CodeGenCUDA/lambda-reference-var.cu b/clang/test/CodeGenCUDA/lambda-reference-var.cu
index 34023bb760367..ab47a2453bc0b 100644
--- a/clang/test/CodeGenCUDA/lambda-reference-var.cu
+++ b/clang/test/CodeGenCUDA/lambda-reference-var.cu
@@ -1,18 +1,18 @@
-// RUN: %clang_cc1 -no-opaque-pointers -x hip -emit-llvm -std=c++11 %s -o - \
+// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \
// RUN: -triple x86_64-linux-gnu \
// RUN: | FileCheck -check-prefix=HOST %s
-// RUN: %clang_cc1 -no-opaque-pointers -x hip -emit-llvm -std=c++11 %s -o - \
+// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \
// RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \
// RUN: | FileCheck -check-prefix=DEV %s
#include "Inputs/cuda.h"
-// HOST: %[[T1:.*]] = type <{ i32*, i32, [4 x i8] }>
-// HOST: %[[T2:.*]] = type { i32*, i32** }
-// HOST: %[[T3:.*]] = type <{ i32*, i32, [4 x i8] }>
-// DEV: %[[T1:.*]] = type { i32* }
-// DEV: %[[T2:.*]] = type { i32** }
-// DEV: %[[T3:.*]] = type <{ i32*, i32, [4 x i8] }>
+// HOST: %[[T1:.*]] = type <{ ptr, i32, [4 x i8] }>
+// HOST: %[[T2:.*]] = type { ptr, ptr }
+// HOST: %[[T3:.*]] = type <{ ptr, i32, [4 x i8] }>
+// DEV: %[[T1:.*]] = type { ptr }
+// DEV: %[[T2:.*]] = type { ptr }
+// DEV: %[[T3:.*]] = type <{ ptr, i32, [4 x i8] }>
int global_host_var;
__device__ int global_device_var;
@@ -20,7 +20,7 @@ template<class F>
__global__ void kern(F f) { f(); }
// DEV-LABEL: @_ZZ27dev_capture_dev_ref_by_copyPiENKUlvE_clEv(
-// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: %[[VAL:.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_device_var to ptr)
// DEV: store i32 %[[VAL]]
__device__ void dev_capture_dev_ref_by_copy(int *out) {
int &ref = global_device_var;
@@ -37,10 +37,10 @@ __device__ void dev_capture_dev_rval_by_copy(int *out) {
}
// DEV-LABEL: @_ZZ26dev_capture_dev_ref_by_refPiENKUlvE_clEv(
-// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: %[[VAL:.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_device_var to ptr)
// DEV: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1
-// DEV: store i32 %[[VAL2]], i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
-// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: store i32 %[[VAL2]], ptr addrspacecast (ptr addrspace(1) @global_device_var to ptr)
+// DEV: %[[VAL:.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_device_var to ptr)
// DEV: store i32 %[[VAL]]
__device__ void dev_capture_dev_ref_by_ref(int *out) {
int &ref = global_device_var;
@@ -48,10 +48,10 @@ __device__ void dev_capture_dev_ref_by_ref(int *out) {
}
// DEV-LABEL: define{{.*}} void @_Z7dev_refPi(
-// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: %[[VAL:.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_device_var to ptr)
// DEV: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1
-// DEV: store i32 %[[VAL2]], i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
-// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: store i32 %[[VAL2]], ptr addrspacecast (ptr addrspace(1) @global_device_var to ptr)
+// DEV: %[[VAL:.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_device_var to ptr)
// DEV: store i32 %[[VAL]]
__device__ void dev_ref(int *out) {
int &ref = global_device_var;
@@ -60,10 +60,10 @@ __device__ void dev_ref(int *out) {
}
// DEV-LABEL: @_ZZ14dev_lambda_refPiENKUlvE_clEv(
-// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: %[[VAL:.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_device_var to ptr)
// DEV: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1
-// DEV: store i32 %[[VAL2]], i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
-// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: store i32 %[[VAL2]], ptr addrspacecast (ptr addrspace(1) @global_device_var to ptr)
+// DEV: %[[VAL:.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_device_var to ptr)
// DEV: store i32 %[[VAL]]
__device__ void dev_lambda_ref(int *out) {
[=](){
@@ -74,7 +74,7 @@ __device__ void dev_lambda_ref(int *out) {
}
// HOST-LABEL: @_ZZ29host_capture_host_ref_by_copyPiENKUlvE_clEv(
-// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var
+// HOST: %[[VAL:.*]] = load i32, ptr @global_host_var
// HOST: store i32 %[[VAL]]
void host_capture_host_ref_by_copy(int *out) {
int &ref = global_host_var;
@@ -82,12 +82,12 @@ void host_capture_host_ref_by_copy(int *out) {
}
// HOST-LABEL: @_ZZ28host_capture_host_ref_by_refPiENKUlvE_clEv(
-// HOST: %[[CAP:.*]] = getelementptr inbounds %[[T2]], %[[T2]]* %this1, i32 0, i32 0
-// HOST: %[[REF:.*]] = load i32*, i32** %[[CAP]]
-// HOST: %[[VAL:.*]] = load i32, i32* %[[REF]]
+// HOST: %[[CAP:.*]] = getelementptr inbounds %[[T2]], ptr %this1, i32 0, i32 0
+// HOST: %[[REF:.*]] = load ptr, ptr %[[CAP]]
+// HOST: %[[VAL:.*]] = load i32, ptr %[[REF]]
// HOST: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1
-// HOST: store i32 %[[VAL2]], i32* %[[REF]]
-// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var
+// HOST: store i32 %[[VAL2]], ptr %[[REF]]
+// HOST: %[[VAL:.*]] = load i32, ptr @global_host_var
// HOST: store i32 %[[VAL]]
void host_capture_host_ref_by_ref(int *out) {
int &ref = global_host_var;
@@ -95,10 +95,10 @@ void host_capture_host_ref_by_ref(int *out) {
}
// HOST-LABEL: define{{.*}} void @_Z8host_refPi(
-// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var
+// HOST: %[[VAL:.*]] = load i32, ptr @global_host_var
// HOST: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1
-// HOST: store i32 %[[VAL2]], i32* @global_host_var
-// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var
+// HOST: store i32 %[[VAL2]], ptr @global_host_var
+// HOST: %[[VAL:.*]] = load i32, ptr @global_host_var
// HOST: store i32 %[[VAL]]
void host_ref(int *out) {
int &ref = global_host_var;
@@ -107,10 +107,10 @@ void host_ref(int *out) {
}
// HOST-LABEL: @_ZZ15host_lambda_refPiENKUlvE_clEv(
-// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var
+// HOST: %[[VAL:.*]] = load i32, ptr @global_host_var
// HOST: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1
-// HOST: store i32 %[[VAL2]], i32* @global_host_var
-// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var
+// HOST: store i32 %[[VAL2]], ptr @global_host_var
+// HOST: %[[VAL:.*]] = load i32, ptr @global_host_var
// HOST: store i32 %[[VAL]]
void host_lambda_ref(int *out) {
[=](){
@@ -121,12 +121,12 @@ void host_lambda_ref(int *out) {
}
// HOST-LABEL: define{{.*}} void @_Z28dev_capture_host_ref_by_copyPi(
-// HOST: %[[CAP:.*]] = getelementptr inbounds %[[T3]], %[[T3]]* %{{.*}}, i32 0, i32 1
-// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var
-// HOST: store i32 %[[VAL]], i32* %[[CAP]]
+// HOST: %[[CAP:.*]] = getelementptr inbounds %[[T3]], ptr %{{.*}}, i32 0, i32 1
+// HOST: %[[VAL:.*]] = load i32, ptr @global_host_var
+// HOST: store i32 %[[VAL]], ptr %[[CAP]]
// DEV-LABEL: define internal void @_ZZ28dev_capture_host_ref_by_copyPiENKUlvE_clEv(
-// DEV: %[[CAP:.*]] = getelementptr inbounds %[[T3]], %[[T3]]* %this1, i32 0, i32 1
-// DEV: %[[VAL:.*]] = load i32, i32* %[[CAP]]
+// DEV: %[[CAP:.*]] = getelementptr inbounds %[[T3]], ptr %this1, i32 0, i32 1
+// DEV: %[[VAL:.*]] = load i32, ptr %[[CAP]]
// DEV: store i32 %[[VAL]]
void dev_capture_host_ref_by_copy(int *out) {
int &ref = global_host_var;
diff --git a/clang/test/CodeGenCUDA/lambda.cu b/clang/test/CodeGenCUDA/lambda.cu
index c2012dc963558..b620e7d44041c 100644
--- a/clang/test/CodeGenCUDA/lambda.cu
+++ b/clang/test/CodeGenCUDA/lambda.cu
@@ -1,7 +1,7 @@
-// RUN: %clang_cc1 -no-opaque-pointers -x hip -emit-llvm -std=c++11 %s -o - \
+// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \
// RUN: -triple x86_64-linux-gnu \
// RUN: | FileCheck -check-prefix=HOST %s
-// RUN: %clang_cc1 -no-opaque-pointers -x hip -emit-llvm -std=c++11 %s -o - \
+// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \
// RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \
// RUN: | FileCheck -check-prefix=DEV %s
@@ -43,7 +43,7 @@
// DEV-LABEL: define{{.*}} amdgpu_kernel void @_Z1gIZ12test_capturevEUlvE_EvT_
// DEV: call void @_ZZ12test_capturevENKUlvE_clEv
// DEV-LABEL: define internal void @_ZZ12test_capturevENKUlvE_clEv
-// DEV: store i32 1, i32* addrspacecast (i32 addrspace(1)* @a to i32*)
+// DEV: store i32 1, ptr addrspacecast (ptr addrspace(1) @a to ptr)
// Check functions emitted for test_resolve in device compilation.
// Check device version of template function 'overloaded' is emitted and called
diff --git a/clang/test/CodeGenCUDA/launch-bounds.cu b/clang/test/CodeGenCUDA/launch-bounds.cu
index b722119a21a0f..58bcc410201f3 100644
--- a/clang/test/CodeGenCUDA/launch-bounds.cu
+++ b/clang/test/CodeGenCUDA/launch-bounds.cu
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -no-opaque-pointers %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s
#include "Inputs/cuda.h"
@@ -14,8 +14,8 @@ Kernel1()
}
}
-// CHECK: !{{[0-9]+}} = !{void ()* @Kernel1, !"maxntidx", i32 256}
-// CHECK: !{{[0-9]+}} = !{void ()* @Kernel1, !"minctasm", i32 2}
+// CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"maxntidx", i32 256}
+// CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"minctasm", i32 2}
// Test only max threads per block. Min cta per sm defaults to 0, and
// CodeGen doesn't output a zero value for minctasm.
@@ -27,7 +27,7 @@ Kernel2()
}
}
-// CHECK: !{{[0-9]+}} = !{void ()* @Kernel2, !"maxntidx", i32 256}
+// CHECK: !{{[0-9]+}} = !{ptr @Kernel2, !"maxntidx", i32 256}
template <int max_threads_per_block>
__global__ void
@@ -37,7 +37,7 @@ Kernel3()
}
template __global__ void Kernel3<MAX_THREADS_PER_BLOCK>();
-// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel3{{.*}}, !"maxntidx", i32 256}
+// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel3{{.*}}, !"maxntidx", i32 256}
template <int max_threads_per_block, int min_blocks_per_mp>
__global__ void
@@ -47,8 +47,8 @@ Kernel4()
}
template __global__ void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
-// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256}
-// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2}
+// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256}
+// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2}
const int constint = 100;
template <int max_threads_per_block, int min_blocks_per_mp>
@@ -60,8 +60,8 @@ Kernel5()
}
template __global__ void Kernel5<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
-// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356}
-// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258}
+// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356}
+// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258}
// Make sure we don't emit negative launch bounds values.
__global__ void
@@ -69,18 +69,18 @@ __launch_bounds__( -MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP )
Kernel6()
{
}
-// CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel6{{.*}}, !"maxntidx",
-// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel6{{.*}}, !"minctasm",
+// CHECK-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel6{{.*}}, !"maxntidx",
+// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel6{{.*}}, !"minctasm",
__global__ void
__launch_bounds__( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP )
Kernel7()
{
}
-// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel7{{.*}}, !"maxntidx",
-// CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel7{{.*}}, !"minctasm",
+// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7{{.*}}, !"maxntidx",
+// CHECK-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7{{.*}}, !"minctasm",
const char constchar = 12;
__global__ void __launch_bounds__(constint, constchar) Kernel8() {}
-// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel8{{.*}}, !"maxntidx", i32 100
-// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel8{{.*}}, !"minctasm", i32 12
+// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"maxntidx", i32 100
+// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"minctasm", i32 12
diff --git a/clang/test/CodeGenCUDA/llvm-used.cu b/clang/test/CodeGenCUDA/llvm-used.cu
index 521a39f17ee09..c39111dd48036 100644
--- a/clang/test/CodeGenCUDA/llvm-used.cu
+++ b/clang/test/CodeGenCUDA/llvm-used.cu
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -no-opaque-pointers -emit-llvm %s -o - -fcuda-is-device -triple nvptx64-unknown-unknown | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple nvptx64-unknown-unknown | FileCheck %s
// Make sure we emit the proper addrspacecast for llvm.used. PR22383 exposed an
// issue where we were generating a bitcast instead of an addrspacecast.
-// CHECK: @llvm.compiler.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast ([0 x i32] addrspace(1)* @a to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
+// CHECK: @llvm.compiler.used = appending global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @a to ptr)], section "llvm.metadata"
__attribute__((device)) __attribute__((__used__)) int a[] = {};
diff --git a/clang/test/CodeGenCUDA/printf.cu b/clang/test/CodeGenCUDA/printf.cu
index 56233e46dcf17..396181820dde8 100644
--- a/clang/test/CodeGenCUDA/printf.cu
+++ b/clang/test/CodeGenCUDA/printf.cu
@@ -1,7 +1,7 @@
// REQUIRES: x86-registered-target
// REQUIRES: nvptx-registered-target
-// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm \
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm \
// RUN: -o - %s | FileCheck %s
#include "Inputs/cuda.h"
@@ -14,20 +14,19 @@ __device__ int CheckSimple() {
// CHECK: [[BUF:%[a-zA-Z0-9_]+]] = alloca [[SIMPLE_PRINTF_TY]]
// CHECK: [[FMT:%[0-9]+]] = load{{.*}}%fmt
const char* fmt = "%d %lld %f";
- // CHECK: [[PTR0:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]* [[BUF]], i32 0, i32 0
- // CHECK: store i32 1, i32* [[PTR0]], align 4
- // CHECK: [[PTR1:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]* [[BUF]], i32 0, i32 1
- // CHECK: store i64 2, i64* [[PTR1]], align 8
- // CHECK: [[PTR2:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]* [[BUF]], i32 0, i32 2
- // CHECK: store double 3.0{{[^,]*}}, double* [[PTR2]], align 8
- // CHECK: [[BUF_CAST:%[0-9]+]] = bitcast [[SIMPLE_PRINTF_TY]]* [[BUF]] to i8*
- // CHECK: [[RET:%[0-9]+]] = call i32 @vprintf(i8* [[FMT]], i8* [[BUF_CAST]])
+ // CHECK: [[PTR0:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], ptr [[BUF]], i32 0, i32 0
+ // CHECK: store i32 1, ptr [[PTR0]], align 4
+ // CHECK: [[PTR1:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], ptr [[BUF]], i32 0, i32 1
+ // CHECK: store i64 2, ptr [[PTR1]], align 8
+ // CHECK: [[PTR2:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], ptr [[BUF]], i32 0, i32 2
+ // CHECK: store double 3.0{{[^,]*}}, ptr [[PTR2]], align 8
+ // CHECK: [[RET:%[0-9]+]] = call i32 @vprintf(ptr [[FMT]], ptr [[BUF]])
// CHECK: ret i32 [[RET]]
return printf(fmt, 1, 2ll, 3.0);
}
__device__ void CheckNoArgs() {
- // CHECK: call i32 @vprintf({{.*}}, i8* null){{$}}
+ // CHECK: call i32 @vprintf({{.*}}, ptr null){{$}}
printf("hello, world!");
}
diff --git a/clang/test/CodeGenCUDA/ptx-kernels.cu b/clang/test/CodeGenCUDA/ptx-kernels.cu
index 1941245c42384..b7172b7736929 100644
--- a/clang/test/CodeGenCUDA/ptx-kernels.cu
+++ b/clang/test/CodeGenCUDA/ptx-kernels.cu
@@ -2,7 +2,7 @@
// annotations and are added to @llvm.used to prevent their elimination.
// REQUIRES: nvptx-registered-target
//
-// RUN: %clang_cc1 -no-opaque-pointers %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s
#include "Inputs/cuda.h"
@@ -31,5 +31,5 @@ void host_function() {
anonymous_ns_kernel<<<0,0>>>();
}
-// CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1}
-// CHECK: !{{[0-9]+}} = !{void (i32)* @_Z16templated_kernelIiEvT_, !"kernel", i32 1}
+// CHECK: !{{[0-9]+}} = !{ptr @global_function, !"kernel", i32 1}
+// CHECK: !{{[0-9]+}} = !{ptr @_Z16templated_kernelIiEvT_, !"kernel", i32 1}
diff --git a/clang/test/CodeGenCUDA/redux-builtins.cu b/clang/test/CodeGenCUDA/redux-builtins.cu
index 2b295944c6ad8..a6c83945ab156 100644
--- a/clang/test/CodeGenCUDA/redux-builtins.cu
+++ b/clang/test/CodeGenCUDA/redux-builtins.cu
@@ -1,7 +1,7 @@
-// RUN: %clang_cc1 -no-opaque-pointers "-triple" "nvptx-nvidia-cuda" "-target-feature" "+ptx70" "-target-cpu" "sm_80" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
-// RUN: %clang_cc1 -no-opaque-pointers "-triple" "nvptx64-nvidia-cuda" "-target-feature" "+ptx70" "-target-cpu" "sm_80" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
+// RUN: %clang_cc1 "-triple" "nvptx-nvidia-cuda" "-target-feature" "+ptx70" "-target-cpu" "sm_80" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
+// RUN: %clang_cc1 "-triple" "nvptx64-nvidia-cuda" "-target-feature" "+ptx70" "-target-cpu" "sm_80" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
-// CHECK: define{{.*}} void @_Z6kernelPi(i32* noundef %out)
+// CHECK: define{{.*}} void @_Z6kernelPi(ptr noundef %out)
__attribute__((global)) void kernel(int *out) {
int a = 1;
unsigned int b = 5;
More information about the cfe-commits
mailing list