[clang] 7cb3005 - AMDGPU: Add dereferenceable attribute to dispatch ptr intrinsic (#185955)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 11 23:28:45 PDT 2026
Author: Matt Arsenault
Date: 2026-03-12T07:28:39+01:00
New Revision: 7cb3005ba22899b8ab7efe4fe43cba09cb7e12d4
URL: https://github.com/llvm/llvm-project/commit/7cb3005ba22899b8ab7efe4fe43cba09cb7e12d4
DIFF: https://github.com/llvm/llvm-project/commit/7cb3005ba22899b8ab7efe4fe43cba09cb7e12d4.diff
LOG: AMDGPU: Add dereferenceable attribute to dispatch ptr intrinsic (#185955)
Stop manually setting it on the callsite in clang.
Added:
Modified:
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
clang/test/CodeGen/amdgpu-abi-version.c
clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
clang/test/CodeGenCUDA/builtins-amdgcn.cu
clang/test/CodeGenCUDA/builtins-spirv-amdgcn.cu
clang/test/CodeGenOpenCL/builtins-amdgcn-workgroup-size.cl
clang/test/CodeGenOpenCL/builtins-amdgcn.cl
clang/test/Headers/gpuintrin.c
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/test/Assembler/amdgcn-intrinsic-attributes.ll
llvm/test/CodeGen/AMDGPU/implicit-arg-block-count.ll
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 4258bfeea1c35..0d572d37ab972 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -56,9 +56,6 @@ Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
const CallExpr *E = nullptr) {
auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
auto *Call = CGF.Builder.CreateCall(F);
- Call->addRetAttr(
- Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
- Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
if (!E)
return Call;
QualType BuiltinRetType = E->getType();
diff --git a/clang/test/CodeGen/amdgpu-abi-version.c b/clang/test/CodeGen/amdgpu-abi-version.c
index 2cfab3e8e3e0c..ae67aa405f4bc 100644
--- a/clang/test/CodeGen/amdgpu-abi-version.c
+++ b/clang/test/CodeGen/amdgpu-abi-version.c
@@ -19,7 +19,7 @@
// LLVM-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP2]], i32 [[TMP7]]
// LLVM-NEXT: [[TMP9:%.*]] = load i16, ptr addrspace(4) [[TMP8]], align 2, !range [[RNG2:![0-9]+]], !invariant.load [[META1]], !noundef [[META1]]
// LLVM-NEXT: [[TMP10:%.*]] = zext i16 [[TMP9]] to i32
-// LLVM-NEXT: [[TMP11:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// LLVM-NEXT: [[TMP11:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// LLVM-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP11]], i64 4
// LLVM-NEXT: [[TMP13:%.*]] = load i16, ptr addrspace(4) [[TMP12]], align 2, !range [[RNG2]], !invariant.load [[META1]], !noundef [[META1]]
// LLVM-NEXT: [[TMP14:%.*]] = zext i16 [[TMP13]] to i32
diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
index b7f597b989242..782728c1e0ae0 100644
--- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
@@ -31,7 +31,7 @@
// PRECOV5-NEXT: i32 2, label %[[SW_BB2:.*]]
// PRECOV5-NEXT: ]
// PRECOV5: [[SW_BB]]:
-// PRECOV5-NEXT: [[TMP1:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// PRECOV5-NEXT: [[TMP1:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// PRECOV5-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP1]], i64 4
// PRECOV5-NEXT: [[TMP3:%.*]] = load i16, ptr addrspace(4) [[TMP2]], align 2, !range [[RNG3:![0-9]+]], !invariant.load [[META4:![0-9]+]], !noundef [[META4]]
// PRECOV5-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32
@@ -39,7 +39,7 @@
// PRECOV5-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4
// PRECOV5-NEXT: br label %[[SW_EPILOG:.*]]
// PRECOV5: [[SW_BB1]]:
-// PRECOV5-NEXT: [[TMP6:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// PRECOV5-NEXT: [[TMP6:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// PRECOV5-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP6]], i64 6
// PRECOV5-NEXT: [[TMP8:%.*]] = load i16, ptr addrspace(4) [[TMP7]], align 2, !range [[RNG3]], !invariant.load [[META4]], !noundef [[META4]]
// PRECOV5-NEXT: [[TMP9:%.*]] = zext i16 [[TMP8]] to i32
@@ -47,7 +47,7 @@
// PRECOV5-NEXT: store i32 [[TMP9]], ptr [[TMP10]], align 4
// PRECOV5-NEXT: br label %[[SW_EPILOG]]
// PRECOV5: [[SW_BB2]]:
-// PRECOV5-NEXT: [[TMP11:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// PRECOV5-NEXT: [[TMP11:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// PRECOV5-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP11]], i64 8
// PRECOV5-NEXT: [[TMP13:%.*]] = load i16, ptr addrspace(4) [[TMP12]], align 2, !range [[RNG3]], !invariant.load [[META4]], !noundef [[META4]]
// PRECOV5-NEXT: [[TMP14:%.*]] = zext i16 [[TMP13]] to i32
diff --git a/clang/test/CodeGenCUDA/builtins-amdgcn.cu b/clang/test/CodeGenCUDA/builtins-amdgcn.cu
index 4bf23e529c7a5..7edf64db91f2e 100644
--- a/clang/test/CodeGenCUDA/builtins-amdgcn.cu
+++ b/clang/test/CodeGenCUDA/builtins-amdgcn.cu
@@ -20,7 +20,7 @@
// CHECK-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr [[OUT_ASCAST]], align 8
// CHECK-NEXT: [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8
// CHECK-NEXT: store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
// CHECK-NEXT: store ptr [[TMP1]], ptr [[DISPATCH_PTR_ASCAST]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DISPATCH_PTR_ASCAST]], align 8
@@ -150,7 +150,7 @@ __global__ void test_ds_fmin(float src, float *shared) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
-// CHECK-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
// CHECK-NEXT: store ptr [[TMP1]], ptr [[X_ASCAST]], align 8
// CHECK-NEXT: ret void
@@ -241,7 +241,7 @@ __device__ void func(float *x);
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP1]], float [[TMP2]] monotonic, align 4
// CHECK-NEXT: store volatile float [[TMP3]], ptr [[X_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SHARED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: call void @_Z4funcPf(ptr noundef [[TMP4]]) #[[ATTR7:[0-9]+]]
+// CHECK-NEXT: call void @_Z4funcPf(ptr noundef [[TMP4]]) #[[ATTR8:[0-9]+]]
// CHECK-NEXT: ret void
//
__global__ void test_ds_fmin_func(float src, float *__restrict shared) {
diff --git a/clang/test/CodeGenCUDA/builtins-spirv-amdgcn.cu b/clang/test/CodeGenCUDA/builtins-spirv-amdgcn.cu
index 1cbe358910b85..677fcd761760d 100644
--- a/clang/test/CodeGenCUDA/builtins-spirv-amdgcn.cu
+++ b/clang/test/CodeGenCUDA/builtins-spirv-amdgcn.cu
@@ -20,7 +20,7 @@
// CHECK-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8
// CHECK-NEXT: [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8
// CHECK-NEXT: store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) addrspace(4) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// CHECK-NEXT: [[TMP0:%.*]] = call addrspace(4) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// CHECK-NEXT: store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[DISPATCH_PTR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[DISPATCH_PTR_ASCAST]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
@@ -232,7 +232,7 @@ __device__ void func(float *x);
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP1]], float [[TMP2]] monotonic, align 4
// CHECK-NEXT: store volatile float [[TMP3]], ptr addrspace(4) [[X_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: call spir_func addrspace(4) void @_Z4funcPf(ptr addrspace(4) noundef [[TMP4]]) #[[ATTR6:[0-9]+]]
+// CHECK-NEXT: call spir_func addrspace(4) void @_Z4funcPf(ptr addrspace(4) noundef [[TMP4]]) #[[ATTR7:[0-9]+]]
// CHECK-NEXT: ret void
//
__global__ void test_ds_fmin_func(float src, float *__restrict shared) {
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-workgroup-size.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-workgroup-size.cl
index 9b4cdfa08176f..4e64f1127a912 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-workgroup-size.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-workgroup-size.cl
@@ -33,7 +33,7 @@
// NONUNIFORM-V4-LABEL: define dso_local range(i32 0, 1025) i32 @test_get_workgroup_size_x(
// NONUNIFORM-V4-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// NONUNIFORM-V4-NEXT: [[ENTRY:.*:]]
-// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// NONUNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 4
// NONUNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG7:![0-9]+]], !invariant.load [[META8:![0-9]+]], !noundef [[META8]]
// NONUNIFORM-V4-NEXT: [[TMP3:%.*]] = zext nneg i16 [[TMP2]] to i32
@@ -48,7 +48,7 @@
// UNIFORM-V4-LABEL: define dso_local range(i32 1, 1025) i32 @test_get_workgroup_size_x(
// UNIFORM-V4-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// UNIFORM-V4-NEXT: [[ENTRY:.*:]]
-// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// UNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 4
// UNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG7:![0-9]+]], !invariant.load [[META8:![0-9]+]], !noundef [[META8]]
// UNIFORM-V4-NEXT: [[TMP3:%.*]] = zext nneg i16 [[TMP2]] to i32
@@ -67,7 +67,7 @@
// NONUNIFORM-UNKNOWN-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 [[TMP6]]
// NONUNIFORM-UNKNOWN-NEXT: [[TMP8:%.*]] = load i16, ptr addrspace(4) [[TMP7]], align 2, !range [[RNG7:![0-9]+]], !invariant.load [[META6]], !noundef [[META6]]
// NONUNIFORM-UNKNOWN-NEXT: [[TMP9:%.*]] = zext nneg i16 [[TMP8]] to i32
-// NONUNIFORM-UNKNOWN-NEXT: [[TMP10:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// NONUNIFORM-UNKNOWN-NEXT: [[TMP10:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// NONUNIFORM-UNKNOWN-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP10]], i64 4
// NONUNIFORM-UNKNOWN-NEXT: [[TMP12:%.*]] = load i16, ptr addrspace(4) [[TMP11]], align 4, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]]
// NONUNIFORM-UNKNOWN-NEXT: [[TMP13:%.*]] = zext nneg i16 [[TMP12]] to i32
@@ -87,7 +87,7 @@
// UNIFORM-UNKNOWN-NEXT: [[TMP2:%.*]] = tail call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// UNIFORM-UNKNOWN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 12
// UNIFORM-UNKNOWN-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG6:![0-9]+]], !invariant.load [[META7:![0-9]+]], !noundef [[META7]]
-// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// UNIFORM-UNKNOWN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP5]], i64 4
// UNIFORM-UNKNOWN-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 4, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]]
// UNIFORM-UNKNOWN-NEXT: [[DOTV:%.*]] = select i1 [[TMP1]], i16 [[TMP4]], i16 [[TMP7]]
@@ -125,7 +125,7 @@ unsigned int test_get_workgroup_size_x()
// NONUNIFORM-V4-LABEL: define dso_local range(i32 0, 1025) i32 @test_get_workgroup_size_y(
// NONUNIFORM-V4-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] {
// NONUNIFORM-V4-NEXT: [[ENTRY:.*:]]
-// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// NONUNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 6
// NONUNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 2, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]]
// NONUNIFORM-V4-NEXT: [[TMP3:%.*]] = zext nneg i16 [[TMP2]] to i32
@@ -140,7 +140,7 @@ unsigned int test_get_workgroup_size_x()
// UNIFORM-V4-LABEL: define dso_local range(i32 1, 1025) i32 @test_get_workgroup_size_y(
// UNIFORM-V4-SAME: ) local_unnamed_addr #[[ATTR0]] {
// UNIFORM-V4-NEXT: [[ENTRY:.*:]]
-// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// UNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 6
// UNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 2, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]]
// UNIFORM-V4-NEXT: [[TMP3:%.*]] = zext nneg i16 [[TMP2]] to i32
@@ -160,7 +160,7 @@ unsigned int test_get_workgroup_size_x()
// NONUNIFORM-UNKNOWN-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 [[TMP7]]
// NONUNIFORM-UNKNOWN-NEXT: [[TMP9:%.*]] = load i16, ptr addrspace(4) [[TMP8]], align 2, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]]
// NONUNIFORM-UNKNOWN-NEXT: [[TMP10:%.*]] = zext nneg i16 [[TMP9]] to i32
-// NONUNIFORM-UNKNOWN-NEXT: [[TMP11:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// NONUNIFORM-UNKNOWN-NEXT: [[TMP11:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// NONUNIFORM-UNKNOWN-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP11]], i64 6
// NONUNIFORM-UNKNOWN-NEXT: [[TMP13:%.*]] = load i16, ptr addrspace(4) [[TMP12]], align 2, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]]
// NONUNIFORM-UNKNOWN-NEXT: [[TMP14:%.*]] = zext nneg i16 [[TMP13]] to i32
@@ -180,7 +180,7 @@ unsigned int test_get_workgroup_size_x()
// UNIFORM-UNKNOWN-NEXT: [[TMP2:%.*]] = tail call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// UNIFORM-UNKNOWN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 14
// UNIFORM-UNKNOWN-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[TMP3]], align 2, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]]
-// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// UNIFORM-UNKNOWN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP5]], i64 6
// UNIFORM-UNKNOWN-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 2, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]]
// UNIFORM-UNKNOWN-NEXT: [[DOTV:%.*]] = select i1 [[TMP1]], i16 [[TMP4]], i16 [[TMP7]]
@@ -218,7 +218,7 @@ unsigned int test_get_workgroup_size_y()
// NONUNIFORM-V4-LABEL: define dso_local range(i32 0, 1025) i32 @test_get_workgroup_size_z(
// NONUNIFORM-V4-SAME: ) local_unnamed_addr #[[ATTR3:[0-9]+]] {
// NONUNIFORM-V4-NEXT: [[ENTRY:.*:]]
-// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// NONUNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 8
// NONUNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]]
// NONUNIFORM-V4-NEXT: [[TMP3:%.*]] = zext nneg i16 [[TMP2]] to i32
@@ -233,7 +233,7 @@ unsigned int test_get_workgroup_size_y()
// UNIFORM-V4-LABEL: define dso_local range(i32 1, 1025) i32 @test_get_workgroup_size_z(
// UNIFORM-V4-SAME: ) local_unnamed_addr #[[ATTR0]] {
// UNIFORM-V4-NEXT: [[ENTRY:.*:]]
-// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// UNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 8
// UNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]]
// UNIFORM-V4-NEXT: [[TMP3:%.*]] = zext nneg i16 [[TMP2]] to i32
@@ -253,7 +253,7 @@ unsigned int test_get_workgroup_size_y()
// NONUNIFORM-UNKNOWN-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 [[TMP7]]
// NONUNIFORM-UNKNOWN-NEXT: [[TMP9:%.*]] = load i16, ptr addrspace(4) [[TMP8]], align 2, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]]
// NONUNIFORM-UNKNOWN-NEXT: [[TMP10:%.*]] = zext nneg i16 [[TMP9]] to i32
-// NONUNIFORM-UNKNOWN-NEXT: [[TMP11:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// NONUNIFORM-UNKNOWN-NEXT: [[TMP11:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// NONUNIFORM-UNKNOWN-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP11]], i64 8
// NONUNIFORM-UNKNOWN-NEXT: [[TMP13:%.*]] = load i16, ptr addrspace(4) [[TMP12]], align 4, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]]
// NONUNIFORM-UNKNOWN-NEXT: [[TMP14:%.*]] = zext nneg i16 [[TMP13]] to i32
@@ -273,7 +273,7 @@ unsigned int test_get_workgroup_size_y()
// UNIFORM-UNKNOWN-NEXT: [[TMP2:%.*]] = tail call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// UNIFORM-UNKNOWN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 16
// UNIFORM-UNKNOWN-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[TMP3]], align 8, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]]
-// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// UNIFORM-UNKNOWN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP5]], i64 8
// UNIFORM-UNKNOWN-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 4, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]]
// UNIFORM-UNKNOWN-NEXT: [[DOTV:%.*]] = select i1 [[TMP1]], i16 [[TMP4]], i16 [[TMP7]]
@@ -368,7 +368,7 @@ unsigned int test_get_workgroup_size_z()
// NONUNIFORM-V4-NEXT: i32 2, label %[[SW_BB2:.*]]
// NONUNIFORM-V4-NEXT: ]
// NONUNIFORM-V4: [[SW_BB]]:
-// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// NONUNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// NONUNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 4
// NONUNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]]
// NONUNIFORM-V4-NEXT: [[TMP3:%.*]] = zext nneg i16 [[TMP2]] to i32
@@ -381,7 +381,7 @@ unsigned int test_get_workgroup_size_z()
// NONUNIFORM-V4-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[TMP9]], 1
// NONUNIFORM-V4-NEXT: br label %[[SW_EPILOG]]
// NONUNIFORM-V4: [[SW_BB1]]:
-// NONUNIFORM-V4-NEXT: [[TMP10:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// NONUNIFORM-V4-NEXT: [[TMP10:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// NONUNIFORM-V4-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP10]], i64 6
// NONUNIFORM-V4-NEXT: [[TMP12:%.*]] = load i16, ptr addrspace(4) [[TMP11]], align 2, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]]
// NONUNIFORM-V4-NEXT: [[TMP13:%.*]] = zext nneg i16 [[TMP12]] to i32
@@ -393,7 +393,7 @@ unsigned int test_get_workgroup_size_z()
// NONUNIFORM-V4-NEXT: [[TMP19:%.*]] = tail call i32 @llvm.umin.i32(i32 [[TMP18]], i32 [[TMP13]])
// NONUNIFORM-V4-NEXT: br label %[[SW_EPILOG]]
// NONUNIFORM-V4: [[SW_BB2]]:
-// NONUNIFORM-V4-NEXT: [[TMP20:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// NONUNIFORM-V4-NEXT: [[TMP20:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// NONUNIFORM-V4-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP20]], i64 8
// NONUNIFORM-V4-NEXT: [[TMP22:%.*]] = load i16, ptr addrspace(4) [[TMP21]], align 4, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]]
// NONUNIFORM-V4-NEXT: [[TMP23:%.*]] = zext nneg i16 [[TMP22]] to i32
@@ -418,18 +418,18 @@ unsigned int test_get_workgroup_size_z()
// UNIFORM-V4-NEXT: i32 2, label %[[SW_BB2:.*]]
// UNIFORM-V4-NEXT: ]
// UNIFORM-V4: [[SW_BB]]:
-// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// UNIFORM-V4-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// UNIFORM-V4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 4
// UNIFORM-V4-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]]
// UNIFORM-V4-NEXT: [[NARROW:%.*]] = add nuw nsw i16 [[TMP2]], 1
// UNIFORM-V4-NEXT: br label %[[SW_EPILOG]]
// UNIFORM-V4: [[SW_BB1]]:
-// UNIFORM-V4-NEXT: [[TMP3:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// UNIFORM-V4-NEXT: [[TMP3:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// UNIFORM-V4-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP3]], i64 6
// UNIFORM-V4-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(4) [[TMP4]], align 2, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]]
// UNIFORM-V4-NEXT: br label %[[SW_EPILOG]]
// UNIFORM-V4: [[SW_BB2]]:
-// UNIFORM-V4-NEXT: [[TMP6:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// UNIFORM-V4-NEXT: [[TMP6:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// UNIFORM-V4-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP6]], i64 8
// UNIFORM-V4-NEXT: [[TMP8:%.*]] = load i16, ptr addrspace(4) [[TMP7]], align 4, !range [[RNG7]], !invariant.load [[META8]], !noundef [[META8]]
// UNIFORM-V4-NEXT: br label %[[SW_EPILOG]]
@@ -458,7 +458,7 @@ unsigned int test_get_workgroup_size_z()
// NONUNIFORM-UNKNOWN-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 [[TMP6]]
// NONUNIFORM-UNKNOWN-NEXT: [[TMP8:%.*]] = load i16, ptr addrspace(4) [[TMP7]], align 2, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]]
// NONUNIFORM-UNKNOWN-NEXT: [[TMP9:%.*]] = zext nneg i16 [[TMP8]] to i32
-// NONUNIFORM-UNKNOWN-NEXT: [[TMP10:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// NONUNIFORM-UNKNOWN-NEXT: [[TMP10:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// NONUNIFORM-UNKNOWN-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP10]], i64 4
// NONUNIFORM-UNKNOWN-NEXT: [[TMP12:%.*]] = load i16, ptr addrspace(4) [[TMP11]], align 4, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]]
// NONUNIFORM-UNKNOWN-NEXT: [[TMP13:%.*]] = zext nneg i16 [[TMP12]] to i32
@@ -482,7 +482,7 @@ unsigned int test_get_workgroup_size_z()
// NONUNIFORM-UNKNOWN-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP22]], i64 [[TMP27]]
// NONUNIFORM-UNKNOWN-NEXT: [[TMP29:%.*]] = load i16, ptr addrspace(4) [[TMP28]], align 2, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]]
// NONUNIFORM-UNKNOWN-NEXT: [[TMP30:%.*]] = zext nneg i16 [[TMP29]] to i32
-// NONUNIFORM-UNKNOWN-NEXT: [[TMP31:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// NONUNIFORM-UNKNOWN-NEXT: [[TMP31:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// NONUNIFORM-UNKNOWN-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP31]], i64 6
// NONUNIFORM-UNKNOWN-NEXT: [[TMP33:%.*]] = load i16, ptr addrspace(4) [[TMP32]], align 2, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]]
// NONUNIFORM-UNKNOWN-NEXT: [[TMP34:%.*]] = zext nneg i16 [[TMP33]] to i32
@@ -505,7 +505,7 @@ unsigned int test_get_workgroup_size_z()
// NONUNIFORM-UNKNOWN-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP43]], i64 [[TMP48]]
// NONUNIFORM-UNKNOWN-NEXT: [[TMP50:%.*]] = load i16, ptr addrspace(4) [[TMP49]], align 2, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]]
// NONUNIFORM-UNKNOWN-NEXT: [[TMP51:%.*]] = zext nneg i16 [[TMP50]] to i32
-// NONUNIFORM-UNKNOWN-NEXT: [[TMP52:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// NONUNIFORM-UNKNOWN-NEXT: [[TMP52:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// NONUNIFORM-UNKNOWN-NEXT: [[TMP53:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP52]], i64 8
// NONUNIFORM-UNKNOWN-NEXT: [[TMP54:%.*]] = load i16, ptr addrspace(4) [[TMP53]], align 4, !range [[RNG7]], !invariant.load [[META6]], !noundef [[META6]]
// NONUNIFORM-UNKNOWN-NEXT: [[TMP55:%.*]] = zext nneg i16 [[TMP54]] to i32
@@ -535,7 +535,7 @@ unsigned int test_get_workgroup_size_z()
// UNIFORM-UNKNOWN-NEXT: [[TMP2:%.*]] = tail call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// UNIFORM-UNKNOWN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP2]], i64 12
// UNIFORM-UNKNOWN-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]]
-// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// UNIFORM-UNKNOWN-NEXT: [[TMP5:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// UNIFORM-UNKNOWN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP5]], i64 4
// UNIFORM-UNKNOWN-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 4, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]]
// UNIFORM-UNKNOWN-NEXT: [[DOTV7:%.*]] = select i1 [[TMP1]], i16 [[TMP4]], i16 [[TMP7]]
@@ -547,7 +547,7 @@ unsigned int test_get_workgroup_size_z()
// UNIFORM-UNKNOWN-NEXT: [[TMP10:%.*]] = tail call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// UNIFORM-UNKNOWN-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP10]], i64 14
// UNIFORM-UNKNOWN-NEXT: [[TMP12:%.*]] = load i16, ptr addrspace(4) [[TMP11]], align 2, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]]
-// UNIFORM-UNKNOWN-NEXT: [[TMP13:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// UNIFORM-UNKNOWN-NEXT: [[TMP13:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// UNIFORM-UNKNOWN-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP13]], i64 6
// UNIFORM-UNKNOWN-NEXT: [[TMP15:%.*]] = load i16, ptr addrspace(4) [[TMP14]], align 2, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]]
// UNIFORM-UNKNOWN-NEXT: [[DOTV6:%.*]] = select i1 [[TMP9]], i16 [[TMP12]], i16 [[TMP15]]
@@ -558,7 +558,7 @@ unsigned int test_get_workgroup_size_z()
// UNIFORM-UNKNOWN-NEXT: [[TMP18:%.*]] = tail call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// UNIFORM-UNKNOWN-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP18]], i64 16
// UNIFORM-UNKNOWN-NEXT: [[TMP20:%.*]] = load i16, ptr addrspace(4) [[TMP19]], align 8, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]]
-// UNIFORM-UNKNOWN-NEXT: [[TMP21:%.*]] = tail call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// UNIFORM-UNKNOWN-NEXT: [[TMP21:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// UNIFORM-UNKNOWN-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP21]], i64 8
// UNIFORM-UNKNOWN-NEXT: [[TMP23:%.*]] = load i16, ptr addrspace(4) [[TMP22]], align 4, !range [[RNG6]], !invariant.load [[META7]], !noundef [[META7]]
// UNIFORM-UNKNOWN-NEXT: [[DOTV:%.*]] = select i1 [[TMP17]], i16 [[TMP20]], i16 [[TMP23]]
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index dc5333c92d439..f4e2676212f3d 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -1051,7 +1051,7 @@ void test_read_exec_hi(global uint* out) {
}
// CHECK-LABEL: @test_dispatch_ptr
-// CHECK: {{.*}}call align 4 dereferenceable(64){{.*}} ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// CHECK: {{.*}}call{{.*}} ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
#if !defined(__SPIRV__)
void test_dispatch_ptr(__constant unsigned char ** out)
#else
@@ -1138,7 +1138,7 @@ void test_get_local_id(int d, global int *out)
// CHECK: declare noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z()
// CHECK-LABEL: @test_get_grid_size(
-// CHECK: {{.*}}call align 4 dereferenceable(64){{.*}} ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// CHECK: {{.*}}call{{.*}}ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// CHECK: getelementptr inbounds nuw i8, ptr addrspace(4) %{{.*}}, i64 %{{.+}}
// CHECK: load i32, ptr addrspace(4) %{{.*}}, align 4, !range [[$GRID_RANGE:![0-9]+]], !invariant.load
void test_get_grid_size(int d, global int *out)
diff --git a/clang/test/Headers/gpuintrin.c b/clang/test/Headers/gpuintrin.c
index c6a20dec210bb..3c4fcfc2bd43d 100644
--- a/clang/test/Headers/gpuintrin.c
+++ b/clang/test/Headers/gpuintrin.c
@@ -84,7 +84,7 @@ __gpu_kernel void foo() {
// AMDGPU-LABEL: define internal i32 @__gpu_num_blocks_x(
// AMDGPU-SAME: ) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// AMDGPU-NEXT: [[TMP0:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// AMDGPU-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i32 12
// AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG2:![0-9]+]], !invariant.load [[META3:![0-9]+]]
// AMDGPU-NEXT: [[TMP3:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
@@ -103,7 +103,7 @@ __gpu_kernel void foo() {
// AMDGPU-LABEL: define internal i32 @__gpu_num_blocks_y(
// AMDGPU-SAME: ) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// AMDGPU-NEXT: [[TMP0:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// AMDGPU-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i32 16
// AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG2]], !invariant.load [[META3]]
// AMDGPU-NEXT: [[TMP3:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
@@ -122,7 +122,7 @@ __gpu_kernel void foo() {
// AMDGPU-LABEL: define internal i32 @__gpu_num_blocks_z(
// AMDGPU-SAME: ) #[[ATTR0]] {
// AMDGPU-NEXT: [[ENTRY:.*:]]
-// AMDGPU-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// AMDGPU-NEXT: [[TMP0:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// AMDGPU-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i32 20
// AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !range [[RNG2]], !invariant.load [[META3]]
// AMDGPU-NEXT: [[TMP3:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 9101666c2a49c..3331072a1cb2a 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -180,7 +180,7 @@ def int_amdgcn_cluster_workgroup_max_flat_id:
def int_amdgcn_dispatch_ptr :
DefaultAttrsIntrinsic<[LLVMQualPointerType<4>], [],
- [Align<RetIndex, 4>, NoUndef<RetIndex>, NonNull<RetIndex>, IntrNoMem, IntrSpeculatable]>;
+ [Align<RetIndex, 4>, Dereferenceable<RetIndex, 64>, NoUndef<RetIndex>, NonNull<RetIndex>, IntrNoMem, IntrSpeculatable]>;
def int_amdgcn_queue_ptr :
ClangBuiltin<"__builtin_amdgcn_queue_ptr">,
diff --git a/llvm/test/Assembler/amdgcn-intrinsic-attributes.ll b/llvm/test/Assembler/amdgcn-intrinsic-attributes.ll
index 63d0381ad1fd1..d04d591943023 100644
--- a/llvm/test/Assembler/amdgcn-intrinsic-attributes.ll
+++ b/llvm/test/Assembler/amdgcn-intrinsic-attributes.ll
@@ -2,41 +2,47 @@
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
; Test assumed alignment parameter
+; CHECK: declare noundef nonnull align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-; CHECK: declare i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) align 4 captures(none), i1 immarg) #0
+define ptr addrspace(4) @dispatch_ptr() {
+ %ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+ ret ptr addrspace(4) %ptr
+}
+; CHECK: declare i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) align 4 captures(none), i1 immarg) #1
define i32 @ds_append(ptr addrspace(3) %ptr) {
%ret = call i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %ptr, i1 false)
ret i32 %ret
}
; Test assumed alignment parameter
-; CHECK: declare i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) align 4 captures(none), i1 immarg) #0
+; CHECK: declare i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) align 4 captures(none), i1 immarg) #1
define i32 @ds_consume(ptr addrspace(3) %ptr) {
%ret = call i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) %ptr, i1 false)
ret i32 %ret
}
-; CHECK: declare void @llvm.amdgcn.s.wait.event(i16 immarg) #1
+; CHECK: declare void @llvm.amdgcn.s.wait.event(i16 immarg) #2
define void @s_wait_event() {
call void @llvm.amdgcn.s.wait.event(i16 0)
ret void
}
-; CHECK: declare void @llvm.amdgcn.s.wait.event.export.ready() #1
+; CHECK: declare void @llvm.amdgcn.s.wait.event.export.ready() #2
define void @s_wait_event_export_ready() {
call void @llvm.amdgcn.s.wait.event.export.ready()
ret void
}
; Test assumed range
-; CHECK: declare noundef range(i32 32, 65) i32 @llvm.amdgcn.wavefrontsize() #2
+; CHECK: declare noundef range(i32 32, 65) i32 @llvm.amdgcn.wavefrontsize() #0
define i32 @wavefrontsize() {
%ret = call i32 @llvm.amdgcn.wavefrontsize()
ret i32 %ret
}
-; CHECK: attributes #0 = { convergent nocallback nofree nounwind willreturn memory(argmem: readwrite) }
-; CHECK: attributes #1 = { nocallback nofree nounwind willreturn }
-; CHNCK: attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #1 = { convergent nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+; CHECK: attributes #2 = { nocallback nofree nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/implicit-arg-block-count.ll b/llvm/test/CodeGen/AMDGPU/implicit-arg-block-count.ll
index 83ee7cba567d5..36945665ecfd1 100644
--- a/llvm/test/CodeGen/AMDGPU/implicit-arg-block-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicit-arg-block-count.ll
@@ -155,7 +155,7 @@ define i32 @bad_offset() {
; CHECK-LABEL: define i32 @bad_offset() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-; CHECK-NEXT: [[D_GEP_Y:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 16
+; CHECK-NEXT: [[D_GEP_Y:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[DISPATCH]], i64 16
; CHECK-NEXT: [[GRID_SIZE_Y:%.*]] = load i32, ptr addrspace(4) [[D_GEP_Y]], align 4
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG]], i64 12
@@ -180,7 +180,7 @@ define i32 @dangling() {
; CHECK-LABEL: define i32 @dangling() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12
+; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[DISPATCH]], i64 12
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4
; CHECK-NEXT: ret i32 [[GRID_SIZE_X]]
;
@@ -199,7 +199,7 @@ define i32 @wrong_cast() {
; CHECK-LABEL: define i32 @wrong_cast() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12
+; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[DISPATCH]], i64 12
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG]], i64 12
@@ -224,7 +224,7 @@ define i32 @wrong_size() {
; CHECK-LABEL: define i32 @wrong_size() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12
+; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[DISPATCH]], i64 12
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG]], i64 12
@@ -274,7 +274,7 @@ define i16 @empty_use() {
; CHECK-LABEL: define i16 @empty_use() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12
+; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[DISPATCH]], i64 12
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4
; CHECK-NEXT: [[TRUNC_X:%.*]] = trunc i32 [[GRID_SIZE_X]] to i16
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
More information about the cfe-commits
mailing list