[llvm] [Attributor] Use known non-flat AS before `getAssumedAddrSpace` (PR #143221)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 6 18:06:35 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Shilei Tian (shiltian)
<details>
<summary>Changes</summary>
If the underlying object already has a non-flat address space, we simply use
that before calling `getAssumedAddrSpace`.
---
Patch is 38.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/143221.diff
7 Files Affected:
- (modified) llvm/lib/Transforms/IPO/AttributorAttributes.cpp (+25-6)
- (modified) llvm/test/CodeGen/AMDGPU/aa-as-infer.ll (+25)
- (modified) llvm/test/Transforms/OpenMP/nested_parallelism.ll (+18-10)
- (modified) llvm/test/Transforms/OpenMP/remove_globalization.ll (+22-12)
- (modified) llvm/test/Transforms/OpenMP/spmdization_guarding.ll (+52-48)
- (modified) llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll (+10)
- (modified) llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll (+10)
``````````diff
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 1e908b76d1814..cabd32fe0cf7d 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12592,19 +12592,38 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
ChangeStatus updateImpl(Attributor &A) override {
uint32_t OldAddressSpace = AssumedAddressSpace;
+ unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
auto CheckAddressSpace = [&](Value &Obj) {
- if (isa<UndefValue>(&Obj))
+ // Ignore undef and poison.
+ if (isa<UndefValue>(&Obj) || isa<PoisonValue>(&Obj))
return true;
- if (auto *Arg = dyn_cast<Argument>(&Obj)) {
+
+ // If the object already has a non-flat address space, we simply take it.
+ unsigned ObjAS = Obj.getType()->getPointerAddressSpace();
+ if (ObjAS != FlatAS)
+ return takeAddressSpace(ObjAS);
+
+ // At this point, we know Obj is in the flat address space. For a final
+ // attempt, we want to use 'getAssumedAddrSpace', but first we must get
+ // the associated function, if possible.
+ Function *F = nullptr;
+ if (auto *Arg = dyn_cast<Argument>(&Obj))
+ F = Arg->getParent();
+ else if (auto *I = dyn_cast<Instruction>(&Obj))
+ F = I->getFunction();
+
+ // Use getAssumedAddrSpace if the associated function exists.
+ if (F) {
auto *TTI =
- A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(
- *Arg->getParent());
- unsigned AssumedAS = TTI->getAssumedAddrSpace(Arg);
+ A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(*F);
+ unsigned AssumedAS = TTI->getAssumedAddrSpace(&Obj);
if (AssumedAS != ~0U)
return takeAddressSpace(AssumedAS);
}
- return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
+
+ // Now we can't do anything else but to take the flat AS.
+ return takeAddressSpace(FlatAS);
};
auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(getIRPosition(), this,
diff --git a/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll b/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
index cc2c80060231c..78766b4e8eb08 100644
--- a/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
+++ b/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
@@ -276,3 +276,28 @@ define void @kernel_argument_promotion_pattern_inter_procedure(ptr %p, i32 %val)
call void @use_argument_after_promotion(ptr %p.cast.1, i32 %val)
ret void
}
+
+define amdgpu_kernel void @kernel_argument_with_known_as(ptr addrspace(1) %p1, ptr addrspace(3) %p3, i32 %val) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_argument_with_known_as(
+; CHECK-SAME: ptr addrspace(1) [[P1:%.*]], ptr addrspace(3) [[P3:%.*]], i32 [[VAL:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[P1_CAST:%.*]] = addrspacecast ptr addrspace(1) [[P1]] to ptr
+; CHECK-NEXT: [[P3_CAST:%.*]] = addrspacecast ptr addrspace(3) [[P3]] to ptr
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[VAL]], 0
+; CHECK-NEXT: [[P:%.*]] = select i1 [[B]], ptr [[P1_CAST]], ptr [[P3_CAST]]
+; CHECK-NEXT: [[ATOMIC_ADD:%.*]] = atomicrmw add ptr [[P]], i32 1 syncscope("agent") seq_cst, align 4, !noalias.addrspace [[META0:![0-9]+]], !amdgpu.no.fine.grained.memory [[META1:![0-9]+]], !amdgpu.no.remote.memory [[META1]]
+; CHECK-NEXT: ret void
+;
+ %p1.cast = addrspacecast ptr addrspace(1) %p1 to ptr
+ %p3.cast = addrspacecast ptr addrspace(3) %p3 to ptr
+ %b = icmp eq i32 %val, 0
+ %p = select i1 %b, ptr %p1.cast, ptr %p3.cast
+ %atomic.add = atomicrmw add ptr %p, i32 1 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1, !amdgpu.no.remote.memory !1
+ ret void
+}
+
+!0 = !{i32 5, i32 6}
+!1 = !{}
+;.
+; CHECK: [[META0]] = !{i32 5, i32 6}
+; CHECK: [[META1]] = !{}
+;.
diff --git a/llvm/test/Transforms/OpenMP/nested_parallelism.ll b/llvm/test/Transforms/OpenMP/nested_parallelism.ll
index 1679a27fdae8b..834c623d27695 100644
--- a/llvm/test/Transforms/OpenMP/nested_parallelism.ll
+++ b/llvm/test/Transforms/OpenMP/nested_parallelism.ll
@@ -64,7 +64,8 @@ define weak_odr protected ptx_kernel void @__omp_offloading_10302_bd7e0_main_l13
; CHECK-NEXT: br label [[_Z3FOOI_INTERNALIZED_EXIT]]
; CHECK: _Z3fooi.internalized.exit:
; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]]
-; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr addrspace(5) [[TMP4]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: call void @__kmpc_target_deinit()
@@ -109,7 +110,8 @@ define hidden void @_Z3fooi(i32 noundef %i1) local_unnamed_addr #1 {
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
; CHECK-NEXT: [[I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
; CHECK-NEXT: store i32 [[I1:%.*]], ptr [[I]], align 16
-; CHECK-NEXT: store ptr [[I]], ptr [[CAPTURED_VARS_ADDRS]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS]], i64 1)
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[I]], i64 4) #[[ATTR2]]
; CHECK-NEXT: ret void
@@ -141,7 +143,8 @@ define weak_odr protected ptx_kernel void @__omp_offloading_10302_bd7e0_main_l16
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(3) @i.i_shared, align 16
-; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: call void @__kmpc_target_deinit()
@@ -175,7 +178,8 @@ define hidden void @_Z4foo1i(i32 noundef %i1) local_unnamed_addr #1 {
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
; CHECK-NEXT: [[I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
; CHECK-NEXT: store i32 [[I1:%.*]], ptr [[I]], align 16
-; CHECK-NEXT: store ptr [[I]], ptr [[CAPTURED_VARS_ADDRS]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS]], i64 1)
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[I]], i64 4) #[[ATTR2]]
; CHECK-NEXT: ret void
@@ -202,7 +206,8 @@ define internal void @__omp_outlined__(ptr noalias nocapture readnone %.global_t
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
; CHECK-NEXT: [[I_I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
; CHECK-NEXT: store i32 [[TMP0]], ptr [[I_I]], align 16
-; CHECK-NEXT: store ptr [[I_I]], ptr [[CAPTURED_VARS_ADDRS_I]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr [[I_I]], ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[I_I]], i64 4) #[[ATTR2]]
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
@@ -228,15 +233,17 @@ define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #5 {
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS_I_I:%.*]] = alloca [1 x ptr], align 8
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr nonnull [[GLOBAL_ARGS]])
-; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[TMP5]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]])
-; CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
; CHECK-NEXT: [[I_I_I:%.*]] = call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
; CHECK-NEXT: store i32 [[TMP4]], ptr [[I_I_I]], align 16
-; CHECK-NEXT: store ptr [[I_I_I]], ptr [[CAPTURED_VARS_ADDRS_I_I]], align 8
-; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]], i64 1)
+; CHECK-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I_I]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr [[I_I_I]], ptr addrspace(5) [[TMP7]], align 8
+; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]], i64 1)
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[I_I_I]], i64 4) #[[ATTR2]]
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]])
; CHECK-NEXT: ret void
@@ -287,7 +294,8 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #5 {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr nonnull [[GLOBAL_ARGS]])
-; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[TMP5]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK-NEXT: [[INC_I:%.*]] = add nsw i32 [[TMP4]], 1
diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll
index 29f2030c4d42b..9966c33f0c91b 100644
--- a/llvm/test/Transforms/OpenMP/remove_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll
@@ -163,17 +163,22 @@ define internal void @convert_and_move_alloca() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
; CHECK-NEXT: [[IV_PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[UB_PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[UB_PTR]] to ptr addrspace(5)
; CHECK-NEXT: br label [[INITLOOP:%.*]]
; CHECK: initloop:
-; CHECK-NEXT: store i32 0, ptr [[IV_PTR]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-NEXT: store i32 0, ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: br label [[LOOPBODY:%.*]]
; CHECK: loopbody:
-; CHECK-NEXT: [[IV:%.*]] = load i32, ptr [[IV_PTR]], align 4
-; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], 10
-; CHECK-NEXT: br i1 [[TMP0]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-NEXT: [[IV:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[IV]], 10
+; CHECK-NEXT: br i1 [[TMP3]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
; CHECK: loopinc:
; CHECK-NEXT: [[INC:%.*]] = add i32 [[IV]], 1
-; CHECK-NEXT: store i32 [[INC]], ptr [[IV_PTR]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-NEXT: store i32 [[INC]], ptr addrspace(5) [[TMP4]], align 4
; CHECK-NEXT: br label [[LOOPBODY]]
; CHECK: exit:
; CHECK-NEXT: ret void
@@ -183,17 +188,22 @@ define internal void @convert_and_move_alloca() {
; CHECK-DISABLED-NEXT: entry:
; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
; CHECK-DISABLED-NEXT: [[IV_PTR:%.*]] = alloca i32, align 4
+; CHECK-DISABLED-NEXT: [[UB_PTR:%.*]] = alloca i32, align 4
+; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[UB_PTR]] to ptr addrspace(5)
; CHECK-DISABLED-NEXT: br label [[INITLOOP:%.*]]
; CHECK-DISABLED: initloop:
-; CHECK-DISABLED-NEXT: store i32 0, ptr [[IV_PTR]], align 4
+; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-DISABLED-NEXT: store i32 0, ptr addrspace(5) [[TMP1]], align 4
; CHECK-DISABLED-NEXT: br label [[LOOPBODY:%.*]]
; CHECK-DISABLED: loopbody:
-; CHECK-DISABLED-NEXT: [[IV:%.*]] = load i32, ptr [[IV_PTR]], align 4
-; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], 10
-; CHECK-DISABLED-NEXT: br i1 [[TMP0]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
+; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-DISABLED-NEXT: [[IV:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4
+; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[IV]], 10
+; CHECK-DISABLED-NEXT: br i1 [[TMP3]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
; CHECK-DISABLED: loopinc:
; CHECK-DISABLED-NEXT: [[INC:%.*]] = add i32 [[IV]], 1
-; CHECK-DISABLED-NEXT: store i32 [[INC]], ptr [[IV_PTR]], align 4
+; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-DISABLED-NEXT: store i32 [[INC]], ptr addrspace(5) [[TMP4]], align 4
; CHECK-DISABLED-NEXT: br label [[LOOPBODY]]
; CHECK-DISABLED: exit:
; CHECK-DISABLED-NEXT: ret void
@@ -268,7 +278,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-DISABLED: attributes #[[ATTR6]] = { nounwind }
;.
; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None)
-; CHECK: [[META1]] = !DIFile(filename: "remove_globalization.c", directory: {{.*}})
+; CHECK: [[META1]] = !DIFile(filename: "{{.*}}remove_globalization.c", directory: {{.*}})
; CHECK: [[META2]] = !{}
; CHECK: [[META3:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3}
; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
@@ -279,7 +289,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK: [[META9]] = !DISubroutineType(types: [[META2]])
;.
; CHECK-DISABLED: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None)
-; CHECK-DISABLED: [[META1]] = !DIFile(filename: "remove_globalization.c", directory: {{.*}})
+; CHECK-DISABLED: [[META1]] = !DIFile(filename: "{{.*}}remove_globalization.c", directory: {{.*}})
; CHECK-DISABLED: [[META2]] = !{}
; CHECK-DISABLED: [[META3:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3}
; CHECK-DISABLED: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
index 809fc39442f91..d057e5b233e87 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
@@ -71,28 +71,29 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK: user_code.entry:
; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[N]], 42
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[C]], ptr [[AL32]], ptr addrspacecast (ptr addrspace(5) @LocGlob to ptr)
-; CHECK-NEXT: store ptr [[SELECT]], ptr [[LOC]], align 8
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[LOC]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr [[SELECT]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
; CHECK-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1
; CHECK-NEXT: [[SEXT:%.*]] = shl i64 [[N]], 32
; CHECK-NEXT: [[IDXPROM_I:%.*]] = ashr exact i64 [[SEXT]], 32
; CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM_I]]
; CHECK-NEXT: br label [[REGION_CHECK_TID:%.*]]
; CHECK: region.check.tid:
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[TMP4]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; CHECK: region.guarded:
; CHECK-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ARRAYIDX1_I]] to ptr addrspace(1)
-; CHECK-NEXT: store i32 1, ptr addrspace(1) [[TMP4]], align 4, !noalias [[META7]]
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[ARRAYIDX2_I]] to ptr addrspace(1)
-; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(1) [[TMP5]], align 4, !noalias [[META7]]
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[ARRAYIDX1_I]] to ptr addrspace(1)
+; CHECK-NEXT: store i32 1, ptr addrspace(1) [[TMP5]], align 4, !noalias [[META7]]
+; CHECK-NEXT: [[TMP6:%.*]] = addrspacecast ptr [[ARRAYIDX2_I]] to ptr addrspace(1)
+; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(1) [[TMP6]], align 4, !noalias [[META7]]
; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]]
; CHECK: region.guarded.end:
; CHECK-NEXT: br label [[REGION_BARRIER]]
; CHECK: region.barrier:
-; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP2]])
+; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP3]])
; CHECK-NEXT: br label [[REGION_EXIT:%.*]]
; CHECK: region.exit:
; CHECK-NEXT: call void @usei8ptr(ptr captures(none) [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]]
@@ -108,19 +109,20 @@ def...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/143221
More information about the llvm-commits
mailing list