[clang] [NFC][Clang][OpenMP] Automatically generate check lines for `clang/test/OpenMP/amdgcn-attributes.cpp` (PR #176660)
via cfe-commits
cfe-commits at lists.llvm.org
Sun Jan 18 09:36:10 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Shilei Tian (shiltian)
<details>
<summary>Changes</summary>
---
Patch is 27.30 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/176660.diff
1 Files Affected:
- (modified) clang/test/OpenMP/amdgcn-attributes.cpp (+356-10)
``````````diff
diff --git a/clang/test/OpenMP/amdgcn-attributes.cpp b/clang/test/OpenMP/amdgcn-attributes.cpp
index 03f5c31e3157c..39f189eee87d3 100644
--- a/clang/test/OpenMP/amdgcn-attributes.cpp
+++ b/clang/test/OpenMP/amdgcn-attributes.cpp
@@ -1,3 +1,4 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" --prefix-filecheck-ir-name VAR --version 6
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
@@ -14,7 +15,6 @@ int callable(int);
// Check that the target attributes are set on the generated kernel
int func() {
- // ALL-LABEL: amdgpu_kernel void @__omp_offloading{{.*}} #0
int arr[N];
@@ -27,14 +27,360 @@ int func() {
}
int callable(int x) {
- // ALL-LABEL: @_Z8callablei(i32 noundef %x) #2
return x + 1;
}
-
-// DEFAULT: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
-// CPU: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" "uniform-work-group-size"="true" }
-// NOIEEE: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "amdgpu-ieee"="false" "kernel" "no-nans-fp-math"="true" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
-
-// DEFAULT: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// CPU: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" }
-// NOIEEE: attributes #2 = { convergent mustprogress noinline nounwind optnone "amdgpu-ieee"="false" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// DEFAULT: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
+// DEFAULT-LABEL: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21(
+// DEFAULT-SAME: ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[ARR:%.*]]) #[[ATTR0:[0-9]+]] {
+// DEFAULT-NEXT: [[ENTRY:.*:]]
+// DEFAULT-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEFAULT-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEFAULT-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// DEFAULT-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// DEFAULT-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// DEFAULT-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr
+// DEFAULT-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// DEFAULT-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// DEFAULT-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// DEFAULT-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8
+// DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]]
+// DEFAULT-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21_kernel_environment to ptr), ptr [[DYN_PTR]])
+// DEFAULT-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// DEFAULT-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[WORKER_EXIT:.*]]
+// DEFAULT: [[USER_CODE_ENTRY]]:
+// DEFAULT-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
+// DEFAULT-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// DEFAULT-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// DEFAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]]
+// DEFAULT-NEXT: call void @__kmpc_target_deinit()
+// DEFAULT-NEXT: ret void
+// DEFAULT: [[WORKER_EXIT]]:
+// DEFAULT-NEXT: ret void
+//
+//
+// DEFAULT: Function Attrs: convergent noinline norecurse nounwind optnone
+// DEFAULT-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21_omp_outlined(
+// DEFAULT-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[ARR:%.*]]) #[[ATTR1:[0-9]+]] {
+// DEFAULT-NEXT: [[ENTRY:.*:]]
+// DEFAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEFAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEFAULT-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEFAULT-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// DEFAULT-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// DEFAULT-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// DEFAULT-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr
+// DEFAULT-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// DEFAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// DEFAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// DEFAULT-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8
+// DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META7]]
+// DEFAULT-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// DEFAULT-NEXT: br label %[[FOR_COND:.*]]
+// DEFAULT: [[FOR_COND]]:
+// DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[I_ASCAST]], align 4
+// DEFAULT-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 100
+// DEFAULT-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// DEFAULT: [[FOR_BODY]]:
+// DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[I_ASCAST]], align 4
+// DEFAULT-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64
+// DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
+// DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// DEFAULT-NEXT: [[CALL:%.*]] = call noundef i32 @_Z8callablei(i32 noundef [[TMP3]]) #[[ATTR4:[0-9]+]]
+// DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4
+// DEFAULT-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP4]] to i64
+// DEFAULT-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM1]]
+// DEFAULT-NEXT: store i32 [[CALL]], ptr [[ARRAYIDX2]], align 4
+// DEFAULT-NEXT: br label %[[FOR_INC:.*]]
+// DEFAULT: [[FOR_INC]]:
+// DEFAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_ASCAST]], align 4
+// DEFAULT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1
+// DEFAULT-NEXT: store i32 [[INC]], ptr [[I_ASCAST]], align 4
+// DEFAULT-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
+// DEFAULT: [[FOR_END]]:
+// DEFAULT-NEXT: ret void
+//
+//
+// DEFAULT: Function Attrs: convergent mustprogress noinline nounwind optnone
+// DEFAULT-LABEL: define hidden noundef i32 @_Z8callablei(
+// DEFAULT-SAME: i32 noundef [[X:%.*]]) #[[ATTR2:[0-9]+]] {
+// DEFAULT-NEXT: [[ENTRY:.*:]]
+// DEFAULT-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// DEFAULT-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// DEFAULT-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// DEFAULT-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
+// DEFAULT-NEXT: store i32 [[X]], ptr [[X_ADDR_ASCAST]], align 4
+// DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR_ASCAST]], align 4
+// DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
+// DEFAULT-NEXT: ret i32 [[ADD]]
+//
+//
+// ALL: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
+// ALL-LABEL: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21(
+// ALL-SAME: ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[ARR:%.*]]) #[[ATTR0:[0-9]+]] {
+// ALL-NEXT: [[ENTRY:.*:]]
+// ALL-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// ALL-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// ALL-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// ALL-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr
+// ALL-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// ALL-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// ALL-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// ALL-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8
+// ALL-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]]
+// ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21_kernel_environment to ptr), ptr [[DYN_PTR]])
+// ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// ALL-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[WORKER_EXIT:.*]]
+// ALL: [[USER_CODE_ENTRY]]:
+// ALL-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
+// ALL-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// ALL-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// ALL-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]]
+// ALL-NEXT: call void @__kmpc_target_deinit()
+// ALL-NEXT: ret void
+// ALL: [[WORKER_EXIT]]:
+// ALL-NEXT: ret void
+//
+//
+// ALL: Function Attrs: convergent noinline norecurse nounwind optnone
+// ALL-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21_omp_outlined(
+// ALL-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[ARR:%.*]]) #[[ATTR1:[0-9]+]] {
+// ALL-NEXT: [[ENTRY:.*:]]
+// ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// ALL-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// ALL-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// ALL-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// ALL-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// ALL-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr
+// ALL-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// ALL-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// ALL-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// ALL-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8
+// ALL-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META7]]
+// ALL-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// ALL-NEXT: br label %[[FOR_COND:.*]]
+// ALL: [[FOR_COND]]:
+// ALL-NEXT: [[TMP1:%.*]] = load i32, ptr [[I_ASCAST]], align 4
+// ALL-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 100
+// ALL-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// ALL: [[FOR_BODY]]:
+// ALL-NEXT: [[TMP2:%.*]] = load i32, ptr [[I_ASCAST]], align 4
+// ALL-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64
+// ALL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
+// ALL-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// ALL-NEXT: [[CALL:%.*]] = call noundef i32 @_Z8callablei(i32 noundef [[TMP3]]) #[[ATTR4:[0-9]+]]
+// ALL-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4
+// ALL-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP4]] to i64
+// ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM1]]
+// ALL-NEXT: store i32 [[CALL]], ptr [[ARRAYIDX2]], align 4
+// ALL-NEXT: br label %[[FOR_INC:.*]]
+// ALL: [[FOR_INC]]:
+// ALL-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_ASCAST]], align 4
+// ALL-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1
+// ALL-NEXT: store i32 [[INC]], ptr [[I_ASCAST]], align 4
+// ALL-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
+// ALL: [[FOR_END]]:
+// ALL-NEXT: ret void
+//
+//
+// ALL: Function Attrs: convergent mustprogress noinline nounwind optnone
+// ALL-LABEL: define hidden noundef i32 @_Z8callablei(
+// ALL-SAME: i32 noundef [[X:%.*]]) #[[ATTR2:[0-9]+]] {
+// ALL-NEXT: [[ENTRY:.*:]]
+// ALL-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// ALL-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// ALL-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// ALL-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
+// ALL-NEXT: store i32 [[X]], ptr [[X_ADDR_ASCAST]], align 4
+// ALL-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR_ASCAST]], align 4
+// ALL-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
+// ALL-NEXT: ret i32 [[ADD]]
+//
+//
+// CPU: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
+// CPU-LABEL: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21(
+// CPU-SAME: ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[ARR:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPU-NEXT: [[ENTRY:.*:]]
+// CPU-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CPU-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CPU-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CPU-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr
+// CPU-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CPU-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CPU-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CPU-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8
+// CPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]]
+// CPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21_kernel_environment to ptr), ptr [[DYN_PTR]])
+// CPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// CPU-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[WORKER_EXIT:.*]]
+// CPU: [[USER_CODE_ENTRY]]:
+// CPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
+// CPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CPU-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]]
+// CPU-NEXT: call void @__kmpc_target_deinit()
+// CPU-NEXT: ret void
+// CPU: [[WORKER_EXIT]]:
+// CPU-NEXT: ret void
+//
+//
+// CPU: Function Attrs: convergent noinline norecurse nounwind optnone
+// CPU-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21_omp_outlined(
+// CPU-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[ARR:%.*]]) #[[ATTR1:[0-9]+]] {
+// CPU-NEXT: [[ENTRY:.*:]]
+// CPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CPU-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CPU-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr
+// CPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CPU-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8
+// CPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META7]]
+// CPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CPU-NEXT: br label %[[FOR_COND:.*]]
+// CPU: [[FOR_COND]]:
+// CPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[I_ASCAST]], align 4
+// CPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 100
+// CPU-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CPU: [[FOR_BODY]]:
+// CPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[I_ASCAST]], align 4
+// CPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64
+// CPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
+// CPU-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CPU-NEXT: [[CALL:%.*]] = call noundef i32 @_Z8callablei(i32 noundef [[TMP3]]) #[[ATTR4:[0-9]+]]
+// CPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4
+// CPU-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP4]] to i64
+// CPU-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM1]]
+// CPU-NEXT: store i32 [[CALL]], ptr [[ARRAYIDX2]], align 4
+// CPU-NEXT: br label %[[FOR_INC:.*]]
+// CPU: [[FOR_INC]]:
+// CPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_ASCAST]], align 4
+// CPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1
+// CPU-NEXT: store i32 [[INC]], ptr [[I_ASCAST]], align 4
+// CPU-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
+// CPU: [[FOR_END]]:
+// CPU-NEXT: ret void
+//
+//
+// CPU: Function Attrs: convergent mustprogress noinline nounwind optnone
+// CPU-LABEL: define hidden...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/176660
More information about the cfe-commits
mailing list