[llvm] [Attributor] Use known non-flat AS before `getAssumedAddrSpace` (PR #143221)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 6 18:06:35 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Shilei Tian (shiltian)

<details>
<summary>Changes</summary>

If the underlying object already has a non-flat address space, we simply use
that before calling `getAssumedAddrSpace`.

---

Patch is 38.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/143221.diff


7 Files Affected:

- (modified) llvm/lib/Transforms/IPO/AttributorAttributes.cpp (+25-6) 
- (modified) llvm/test/CodeGen/AMDGPU/aa-as-infer.ll (+25) 
- (modified) llvm/test/Transforms/OpenMP/nested_parallelism.ll (+18-10) 
- (modified) llvm/test/Transforms/OpenMP/remove_globalization.ll (+22-12) 
- (modified) llvm/test/Transforms/OpenMP/spmdization_guarding.ll (+52-48) 
- (modified) llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll (+10) 
- (modified) llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll (+10) 


``````````diff
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 1e908b76d1814..cabd32fe0cf7d 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12592,19 +12592,38 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
 
   ChangeStatus updateImpl(Attributor &A) override {
     uint32_t OldAddressSpace = AssumedAddressSpace;
+    unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
 
     auto CheckAddressSpace = [&](Value &Obj) {
-      if (isa<UndefValue>(&Obj))
+      // Ignore undef and poison.
+      if (isa<UndefValue>(&Obj) || isa<PoisonValue>(&Obj))
         return true;
-      if (auto *Arg = dyn_cast<Argument>(&Obj)) {
+
+      // If the object already has a non-flat address space, we simply take it.
+      unsigned ObjAS = Obj.getType()->getPointerAddressSpace();
+      if (ObjAS != FlatAS)
+        return takeAddressSpace(ObjAS);
+
+      // At this point, we know Obj is in the flat address space. For a final
+      // attempt, we want to use 'getAssumedAddrSpace', but first we must get
+      // the associated function, if possible.
+      Function *F = nullptr;
+      if (auto *Arg = dyn_cast<Argument>(&Obj))
+        F = Arg->getParent();
+      else if (auto *I = dyn_cast<Instruction>(&Obj))
+        F = I->getFunction();
+
+      // Use getAssumedAddrSpace if the associated function exists.
+      if (F) {
         auto *TTI =
-            A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(
-                *Arg->getParent());
-        unsigned AssumedAS = TTI->getAssumedAddrSpace(Arg);
+            A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(*F);
+        unsigned AssumedAS = TTI->getAssumedAddrSpace(&Obj);
         if (AssumedAS != ~0U)
           return takeAddressSpace(AssumedAS);
       }
-      return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
+
+      // Now we can't do anything else but to take the flat AS.
+      return takeAddressSpace(FlatAS);
     };
 
     auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(getIRPosition(), this,
diff --git a/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll b/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
index cc2c80060231c..78766b4e8eb08 100644
--- a/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
+++ b/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
@@ -276,3 +276,28 @@ define void @kernel_argument_promotion_pattern_inter_procedure(ptr %p, i32 %val)
   call void @use_argument_after_promotion(ptr %p.cast.1, i32 %val)
   ret void
 }
+
+define amdgpu_kernel void @kernel_argument_with_known_as(ptr addrspace(1) %p1, ptr addrspace(3) %p3, i32 %val) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_argument_with_known_as(
+; CHECK-SAME: ptr addrspace(1) [[P1:%.*]], ptr addrspace(3) [[P3:%.*]], i32 [[VAL:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[P1_CAST:%.*]] = addrspacecast ptr addrspace(1) [[P1]] to ptr
+; CHECK-NEXT:    [[P3_CAST:%.*]] = addrspacecast ptr addrspace(3) [[P3]] to ptr
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[VAL]], 0
+; CHECK-NEXT:    [[P:%.*]] = select i1 [[B]], ptr [[P1_CAST]], ptr [[P3_CAST]]
+; CHECK-NEXT:    [[ATOMIC_ADD:%.*]] = atomicrmw add ptr [[P]], i32 1 syncscope("agent") seq_cst, align 4, !noalias.addrspace [[META0:![0-9]+]], !amdgpu.no.fine.grained.memory [[META1:![0-9]+]], !amdgpu.no.remote.memory [[META1]]
+; CHECK-NEXT:    ret void
+;
+  %p1.cast = addrspacecast ptr addrspace(1) %p1 to ptr
+  %p3.cast = addrspacecast ptr addrspace(3) %p3 to ptr
+  %b = icmp eq i32 %val, 0
+  %p = select i1 %b, ptr %p1.cast, ptr %p3.cast
+  %atomic.add = atomicrmw add ptr %p, i32 1 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1, !amdgpu.no.remote.memory !1
+  ret void
+}
+
+!0 = !{i32 5, i32 6}
+!1 = !{}
+;.
+; CHECK: [[META0]] = !{i32 5, i32 6}
+; CHECK: [[META1]] = !{}
+;.
diff --git a/llvm/test/Transforms/OpenMP/nested_parallelism.ll b/llvm/test/Transforms/OpenMP/nested_parallelism.ll
index 1679a27fdae8b..834c623d27695 100644
--- a/llvm/test/Transforms/OpenMP/nested_parallelism.ll
+++ b/llvm/test/Transforms/OpenMP/nested_parallelism.ll
@@ -64,7 +64,8 @@ define weak_odr protected ptx_kernel void @__omp_offloading_10302_bd7e0_main_l13
 ; CHECK-NEXT:    br label [[_Z3FOOI_INTERNALIZED_EXIT]]
 ; CHECK:       _Z3fooi.internalized.exit:
 ; CHECK-NEXT:    tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]]
-; CHECK-NEXT:    store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I]] to ptr addrspace(5)
+; CHECK-NEXT:    store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr addrspace(5) [[TMP4]], align 8
 ; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
 ; CHECK-NEXT:    call void @__kmpc_target_deinit()
@@ -109,7 +110,8 @@ define hidden void @_Z3fooi(i32 noundef %i1) local_unnamed_addr #1 {
 ; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
 ; CHECK-NEXT:    [[I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
 ; CHECK-NEXT:    store i32 [[I1:%.*]], ptr [[I]], align 16
-; CHECK-NEXT:    store ptr [[I]], ptr [[CAPTURED_VARS_ADDRS]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; CHECK-NEXT:    store ptr [[I]], ptr addrspace(5) [[TMP1]], align 8
 ; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS]], i64 1)
 ; CHECK-NEXT:    call void @__kmpc_free_shared(ptr [[I]], i64 4) #[[ATTR2]]
 ; CHECK-NEXT:    ret void
@@ -141,7 +143,8 @@ define weak_odr protected ptx_kernel void @__omp_offloading_10302_bd7e0_main_l16
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
 ; CHECK-NEXT:    store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(3) @i.i_shared, align 16
-; CHECK-NEXT:    store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I]] to ptr addrspace(5)
+; CHECK-NEXT:    store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr addrspace(5) [[TMP2]], align 8
 ; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
 ; CHECK-NEXT:    call void @__kmpc_target_deinit()
@@ -175,7 +178,8 @@ define hidden void @_Z4foo1i(i32 noundef %i1) local_unnamed_addr #1 {
 ; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
 ; CHECK-NEXT:    [[I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
 ; CHECK-NEXT:    store i32 [[I1:%.*]], ptr [[I]], align 16
-; CHECK-NEXT:    store ptr [[I]], ptr [[CAPTURED_VARS_ADDRS]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; CHECK-NEXT:    store ptr [[I]], ptr addrspace(5) [[TMP1]], align 8
 ; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS]], i64 1)
 ; CHECK-NEXT:    call void @__kmpc_free_shared(ptr [[I]], i64 4) #[[ATTR2]]
 ; CHECK-NEXT:    ret void
@@ -202,7 +206,8 @@ define internal void @__omp_outlined__(ptr noalias nocapture readnone %.global_t
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
 ; CHECK-NEXT:    [[I_I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
 ; CHECK-NEXT:    store i32 [[TMP0]], ptr [[I_I]], align 16
-; CHECK-NEXT:    store ptr [[I_I]], ptr [[CAPTURED_VARS_ADDRS_I]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I]] to ptr addrspace(5)
+; CHECK-NEXT:    store ptr [[I_I]], ptr addrspace(5) [[TMP2]], align 8
 ; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
 ; CHECK-NEXT:    call void @__kmpc_free_shared(ptr [[I_I]], i64 4) #[[ATTR2]]
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
@@ -228,15 +233,17 @@ define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #5 {
 ; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS_I_I:%.*]] = alloca [1 x ptr], align 8
 ; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
 ; CHECK-NEXT:    call void @__kmpc_get_shared_variables(ptr nonnull [[GLOBAL_ARGS]])
-; CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[TMP5]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]])
-; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
 ; CHECK-NEXT:    [[I_I_I:%.*]] = call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
 ; CHECK-NEXT:    store i32 [[TMP4]], ptr [[I_I_I]], align 16
-; CHECK-NEXT:    store ptr [[I_I_I]], ptr [[CAPTURED_VARS_ADDRS_I_I]], align 8
-; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]], i64 1)
+; CHECK-NEXT:    [[TMP7:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I_I]] to ptr addrspace(5)
+; CHECK-NEXT:    store ptr [[I_I_I]], ptr addrspace(5) [[TMP7]], align 8
+; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]], i64 1)
 ; CHECK-NEXT:    call void @__kmpc_free_shared(ptr [[I_I_I]], i64 4) #[[ATTR2]]
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]])
 ; CHECK-NEXT:    ret void
@@ -287,7 +294,8 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #5 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
 ; CHECK-NEXT:    call void @__kmpc_get_shared_variables(ptr nonnull [[GLOBAL_ARGS]])
-; CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[TMP5]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[INC_I:%.*]] = add nsw i32 [[TMP4]], 1
diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll
index 29f2030c4d42b..9966c33f0c91b 100644
--- a/llvm/test/Transforms/OpenMP/remove_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll
@@ -163,17 +163,22 @@ define internal void @convert_and_move_alloca() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
 ; CHECK-NEXT:    [[IV_PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[UB_PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[UB_PTR]] to ptr addrspace(5)
 ; CHECK-NEXT:    br label [[INITLOOP:%.*]]
 ; CHECK:       initloop:
-; CHECK-NEXT:    store i32 0, ptr [[IV_PTR]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-NEXT:    store i32 0, ptr addrspace(5) [[TMP1]], align 4
 ; CHECK-NEXT:    br label [[LOOPBODY:%.*]]
 ; CHECK:       loopbody:
-; CHECK-NEXT:    [[IV:%.*]] = load i32, ptr [[IV_PTR]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[IV]], 10
-; CHECK-NEXT:    br i1 [[TMP0]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-NEXT:    [[IV:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[IV]], 10
+; CHECK-NEXT:    br i1 [[TMP3]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
 ; CHECK:       loopinc:
 ; CHECK-NEXT:    [[INC:%.*]] = add i32 [[IV]], 1
-; CHECK-NEXT:    store i32 [[INC]], ptr [[IV_PTR]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-NEXT:    store i32 [[INC]], ptr addrspace(5) [[TMP4]], align 4
 ; CHECK-NEXT:    br label [[LOOPBODY]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
@@ -183,17 +188,22 @@ define internal void @convert_and_move_alloca() {
 ; CHECK-DISABLED-NEXT:  entry:
 ; CHECK-DISABLED-NEXT:    [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
 ; CHECK-DISABLED-NEXT:    [[IV_PTR:%.*]] = alloca i32, align 4
+; CHECK-DISABLED-NEXT:    [[UB_PTR:%.*]] = alloca i32, align 4
+; CHECK-DISABLED-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[UB_PTR]] to ptr addrspace(5)
 ; CHECK-DISABLED-NEXT:    br label [[INITLOOP:%.*]]
 ; CHECK-DISABLED:       initloop:
-; CHECK-DISABLED-NEXT:    store i32 0, ptr [[IV_PTR]], align 4
+; CHECK-DISABLED-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-DISABLED-NEXT:    store i32 0, ptr addrspace(5) [[TMP1]], align 4
 ; CHECK-DISABLED-NEXT:    br label [[LOOPBODY:%.*]]
 ; CHECK-DISABLED:       loopbody:
-; CHECK-DISABLED-NEXT:    [[IV:%.*]] = load i32, ptr [[IV_PTR]], align 4
-; CHECK-DISABLED-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[IV]], 10
-; CHECK-DISABLED-NEXT:    br i1 [[TMP0]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
+; CHECK-DISABLED-NEXT:    [[TMP2:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-DISABLED-NEXT:    [[IV:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4
+; CHECK-DISABLED-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[IV]], 10
+; CHECK-DISABLED-NEXT:    br i1 [[TMP3]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
 ; CHECK-DISABLED:       loopinc:
 ; CHECK-DISABLED-NEXT:    [[INC:%.*]] = add i32 [[IV]], 1
-; CHECK-DISABLED-NEXT:    store i32 [[INC]], ptr [[IV_PTR]], align 4
+; CHECK-DISABLED-NEXT:    [[TMP4:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-DISABLED-NEXT:    store i32 [[INC]], ptr addrspace(5) [[TMP4]], align 4
 ; CHECK-DISABLED-NEXT:    br label [[LOOPBODY]]
 ; CHECK-DISABLED:       exit:
 ; CHECK-DISABLED-NEXT:    ret void
@@ -268,7 +278,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
 ; CHECK-DISABLED: attributes #[[ATTR6]] = { nounwind }
 ;.
 ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None)
-; CHECK: [[META1]] = !DIFile(filename: "remove_globalization.c", directory: {{.*}})
+; CHECK: [[META1]] = !DIFile(filename: "{{.*}}remove_globalization.c", directory: {{.*}})
 ; CHECK: [[META2]] = !{}
 ; CHECK: [[META3:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3}
 ; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
@@ -279,7 +289,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
 ; CHECK: [[META9]] = !DISubroutineType(types: [[META2]])
 ;.
 ; CHECK-DISABLED: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None)
-; CHECK-DISABLED: [[META1]] = !DIFile(filename: "remove_globalization.c", directory: {{.*}})
+; CHECK-DISABLED: [[META1]] = !DIFile(filename: "{{.*}}remove_globalization.c", directory: {{.*}})
 ; CHECK-DISABLED: [[META2]] = !{}
 ; CHECK-DISABLED: [[META3:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3}
 ; CHECK-DISABLED: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
index 809fc39442f91..d057e5b233e87 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
@@ -71,28 +71,29 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
 ; CHECK:       user_code.entry:
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i64 [[N]], 42
 ; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[C]], ptr [[AL32]], ptr addrspacecast (ptr addrspace(5) @LocGlob to ptr)
-; CHECK-NEXT:    store ptr [[SELECT]], ptr [[LOC]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[LOC]] to ptr addrspace(5)
+; CHECK-NEXT:    store ptr [[SELECT]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
 ; CHECK-NEXT:    [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1
 ; CHECK-NEXT:    [[SEXT:%.*]] = shl i64 [[N]], 32
 ; CHECK-NEXT:    [[IDXPROM_I:%.*]] = ashr exact i64 [[SEXT]], 32
 ; CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM_I]]
 ; CHECK-NEXT:    br label [[REGION_CHECK_TID:%.*]]
 ; CHECK:       region.check.tid:
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT:    br i1 [[TMP4]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
 ; CHECK:       region.guarded:
 ; CHECK-NEXT:    store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]]
-; CHECK-NEXT:    [[TMP4:%.*]] = addrspacecast ptr [[ARRAYIDX1_I]] to ptr addrspace(1)
-; CHECK-NEXT:    store i32 1, ptr addrspace(1) [[TMP4]], align 4, !noalias [[META7]]
-; CHECK-NEXT:    [[TMP5:%.*]] = addrspacecast ptr [[ARRAYIDX2_I]] to ptr addrspace(1)
-; CHECK-NEXT:    store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(1) [[TMP5]], align 4, !noalias [[META7]]
+; CHECK-NEXT:    [[TMP5:%.*]] = addrspacecast ptr [[ARRAYIDX1_I]] to ptr addrspace(1)
+; CHECK-NEXT:    store i32 1, ptr addrspace(1) [[TMP5]], align 4, !noalias [[META7]]
+; CHECK-NEXT:    [[TMP6:%.*]] = addrspacecast ptr [[ARRAYIDX2_I]] to ptr addrspace(1)
+; CHECK-NEXT:    store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(1) [[TMP6]], align 4, !noalias [[META7]]
 ; CHECK-NEXT:    br label [[REGION_GUARDED_END:%.*]]
 ; CHECK:       region.guarded.end:
 ; CHECK-NEXT:    br label [[REGION_BARRIER]]
 ; CHECK:       region.barrier:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP2]])
+; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP3]])
 ; CHECK-NEXT:    br label [[REGION_EXIT:%.*]]
 ; CHECK:       region.exit:
 ; CHECK-NEXT:    call void @usei8ptr(ptr captures(none) [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]]
@@ -108,19 +109,20 @@ def...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/143221


More information about the llvm-commits mailing list