[llvm] 37ea3b3 - Revert "Reapply "[AMDGPU] Make `getAssumedAddrSpace` return AS1 for pointer kernel arguments (#137488)""

Shilei Tian via llvm-commits llvm-commits at lists.llvm.org
Fri May 30 19:06:21 PDT 2025


Author: Shilei Tian
Date: 2025-05-30T22:06:16-04:00
New Revision: 37ea3b32cdcb6c0dcecbcc4bf844f5190c7378dd

URL: https://github.com/llvm/llvm-project/commit/37ea3b32cdcb6c0dcecbcc4bf844f5190c7378dd
DIFF: https://github.com/llvm/llvm-project/commit/37ea3b32cdcb6c0dcecbcc4bf844f5190c7378dd.diff

LOG: Revert "Reapply "[AMDGPU] Make `getAssumedAddrSpace` return AS1 for pointer kernel arguments (#137488)""

This reverts commit 4efc13f8ff1eaf4f9fb1fcea8d4552b3eca052ca.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
    llvm/lib/Transforms/IPO/AttributorAttributes.cpp
    llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
    llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
    llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
    llvm/test/Transforms/OpenMP/barrier_removal.ll
    llvm/test/Transforms/OpenMP/spmdization_guarding.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 1ab4458bafcc3..9091fdd5c959f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -977,10 +977,6 @@ bool AMDGPUTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
 }
 
 unsigned AMDGPUTargetMachine::getAssumedAddrSpace(const Value *V) const {
-  if (auto *Arg = dyn_cast<Argument>(V);
-      Arg && AMDGPU::isKernelCC(Arg->getParent()) && !Arg->hasByRefAttr())
-    return AMDGPUAS::GLOBAL_ADDRESS;
-
   const auto *LD = dyn_cast<LoadInst>(V);
   if (!LD) // TODO: Handle invariant load like constant.
     return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;

diff  --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 3ce03a4b96f61..470c5308edca4 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12592,18 +12592,29 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
   }
 
   ChangeStatus updateImpl(Attributor &A) override {
+    unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
     uint32_t OldAddressSpace = AssumedAddressSpace;
 
     auto CheckAddressSpace = [&](Value &Obj) {
       if (isa<UndefValue>(&Obj))
         return true;
+      // If an argument in flat address space only has addrspace cast uses, and
+      // those casts are same, then we take the dst addrspace.
       if (auto *Arg = dyn_cast<Argument>(&Obj)) {
-        auto *TTI =
-            A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(
-                *Arg->getParent());
-        unsigned AssumedAS = TTI->getAssumedAddrSpace(Arg);
-        if (AssumedAS != ~0U)
-          return takeAddressSpace(AssumedAS);
+        if (Arg->getType()->getPointerAddressSpace() == FlatAS) {
+          unsigned CastAddrSpace = FlatAS;
+          for (auto *U : Arg->users()) {
+            auto *ASCI = dyn_cast<AddrSpaceCastInst>(U);
+            if (!ASCI)
+              return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
+            if (CastAddrSpace != FlatAS &&
+                CastAddrSpace != ASCI->getDestAddressSpace())
+              return false;
+            CastAddrSpace = ASCI->getDestAddressSpace();
+          }
+          if (CastAddrSpace != FlatAS)
+            return takeAddressSpace(CastAddrSpace);
+        }
       }
       return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
     };

diff  --git a/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll b/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
index cc2c80060231c..d1a6414fe49ae 100644
--- a/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
+++ b/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
@@ -246,7 +246,8 @@ define void @foo(ptr addrspace(3) %val) {
 define void @kernel_argument_promotion_pattern_intra_procedure(ptr %p, i32 %val) {
 ; CHECK-LABEL: define void @kernel_argument_promotion_pattern_intra_procedure(
 ; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    store i32 [[VAL]], ptr [[P]], align 4
+; CHECK-NEXT:    [[P_CAST_0:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
+; CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[P_CAST_0]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %p.cast.0 = addrspacecast ptr %p to ptr addrspace(1)
@@ -258,7 +259,8 @@ define void @kernel_argument_promotion_pattern_intra_procedure(ptr %p, i32 %val)
 define internal void @use_argument_after_promotion(ptr %p, i32 %val) {
 ; CHECK-LABEL: define internal void @use_argument_after_promotion(
 ; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    store i32 [[VAL]], ptr [[P]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
+; CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[TMP1]], align 4
 ; CHECK-NEXT:    ret void
 ;
   store i32 %val, ptr %p

diff  --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
index 60bb38f863e8e..99fe986cf6378 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
@@ -66,9 +66,7 @@ define amdgpu_kernel void @store_global_from_flat(ptr %generic_scalar) #0 {
 define amdgpu_kernel void @store_group_from_flat(ptr %generic_scalar) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @store_group_from_flat(
 ; CHECK-SAME: ptr [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[GENERIC_SCALAR]] to ptr addrspace(1)
-; CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[TMP1]] to ptr
-; CHECK-NEXT:    [[_TMP0:%.*]] = addrspacecast ptr [[TMP2]] to ptr addrspace(3)
+; CHECK-NEXT:    [[_TMP0:%.*]] = addrspacecast ptr [[GENERIC_SCALAR]] to ptr addrspace(3)
 ; CHECK-NEXT:    store float 0.000000e+00, ptr addrspace(3) [[_TMP0]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -80,9 +78,7 @@ define amdgpu_kernel void @store_group_from_flat(ptr %generic_scalar) #0 {
 define amdgpu_kernel void @store_private_from_flat(ptr %generic_scalar) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @store_private_from_flat(
 ; CHECK-SAME: ptr [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[GENERIC_SCALAR]] to ptr addrspace(1)
-; CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[TMP1]] to ptr
-; CHECK-NEXT:    [[_TMP0:%.*]] = addrspacecast ptr [[TMP2]] to ptr addrspace(5)
+; CHECK-NEXT:    [[_TMP0:%.*]] = addrspacecast ptr [[GENERIC_SCALAR]] to ptr addrspace(5)
 ; CHECK-NEXT:    store float 0.000000e+00, ptr addrspace(5) [[_TMP0]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -140,10 +136,8 @@ define amdgpu_kernel void @load_store_private(ptr addrspace(5) nocapture %input,
 define amdgpu_kernel void @load_store_flat(ptr nocapture %input, ptr nocapture %output) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @load_store_flat(
 ; CHECK-SAME: ptr captures(none) [[INPUT:%.*]], ptr captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(1)
-; CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
-; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[TMP1]], align 4
-; CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[TMP2]], align 4
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[INPUT]], align 4
+; CHECK-NEXT:    store i32 [[VAL]], ptr [[OUTPUT]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %val = load i32, ptr %input, align 4

diff  --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
index 1c317786d1c20..57453d63d7e8a 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
@@ -48,8 +48,7 @@ define amdgpu_kernel void @memset_global_to_flat_no_md(ptr addrspace(1) %global.
 define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(
 ; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
-; CHECK-NEXT:    call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
 ; CHECK-NEXT:    ret void
 ;
   %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -60,8 +59,7 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(ptr %dest,
 define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(ptr %dest, ptr addrspace(3) %src.group.ptr) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(
 ; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
-; CHECK-NEXT:    call void @llvm.memcpy.inline.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 42, i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
+; CHECK-NEXT:    call void @llvm.memcpy.inline.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 42, i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
 ; CHECK-NEXT:    ret void
 ;
   %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -72,8 +70,7 @@ define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(ptr
 define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(ptr addrspace(3) %dest.group.ptr, ptr %src.ptr, i64 %size) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(
 ; CHECK-SAME: ptr addrspace(3) [[DEST_GROUP_PTR:%.*]], ptr [[SRC_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[SRC_PTR]] to ptr addrspace(1)
-; CHECK-NEXT:    call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) align 4 [[DEST_GROUP_PTR]], ptr addrspace(1) align 4 [[TMP1]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
+; CHECK-NEXT:    call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) align 4 [[DEST_GROUP_PTR]], ptr align 4 [[SRC_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
 ; CHECK-NEXT:    ret void
 ;
   %cast.dest = addrspacecast ptr addrspace(3) %dest.group.ptr to ptr
@@ -119,8 +116,7 @@ define amdgpu_kernel void @memcpy_group_to_flat_replace_dest_global(ptr addrspac
 define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(
 ; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
-; CHECK-NEXT:    call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa.struct [[TBAA_STRUCT8:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa.struct [[TBAA_STRUCT8:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
   %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -131,8 +127,7 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struc
 define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(
 ; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
-; CHECK-NEXT:    call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
 ; CHECK-NEXT:    ret void
 ;
   %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -143,10 +138,8 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(ptr
 define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(ptr %dest0, ptr %dest1, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(
 ; CHECK-SAME: ptr [[DEST0:%.*]], ptr [[DEST1:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[DEST0]] to ptr addrspace(1)
-; CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr [[DEST1]] to ptr addrspace(1)
-; CHECK-NEXT:    call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
-; CHECK-NEXT:    call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP2]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST0]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
 ; CHECK-NEXT:    ret void
 ;
   %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -169,8 +162,7 @@ define amdgpu_kernel void @memcpy_group_flat_to_flat_self(ptr addrspace(3) %grou
 define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(
 ; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
-; CHECK-NEXT:    call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
+; CHECK-NEXT:    call void @llvm.memmove.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
 ; CHECK-NEXT:    ret void
 ;
   %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr

diff  --git a/llvm/test/Transforms/OpenMP/barrier_removal.ll b/llvm/test/Transforms/OpenMP/barrier_removal.ll
index 56f730ccb4189..f662d5dd85b2b 100644
--- a/llvm/test/Transforms/OpenMP/barrier_removal.ll
+++ b/llvm/test/Transforms/OpenMP/barrier_removal.ll
@@ -682,18 +682,11 @@ m:
 }
 
 define internal void @write_then_barrier0(ptr %p) {
-; MODULE-LABEL: define {{[^@]+}}@write_then_barrier0
-; MODULE-SAME: (ptr [[P:%.*]]) {
-; MODULE-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
-; MODULE-NEXT:    store i32 0, ptr addrspace(1) [[TMP1]], align 4
-; MODULE-NEXT:    call void @aligned_barrier()
-; MODULE-NEXT:    ret void
-;
-; CGSCC-LABEL: define {{[^@]+}}@write_then_barrier0
-; CGSCC-SAME: (ptr [[P:%.*]]) {
-; CGSCC-NEXT:    store i32 0, ptr [[P]], align 4
-; CGSCC-NEXT:    call void @aligned_barrier()
-; CGSCC-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@write_then_barrier0
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    store i32 0, ptr [[P]], align 4
+; CHECK-NEXT:    call void @aligned_barrier()
+; CHECK-NEXT:    ret void
 ;
   store i32 0, ptr %p
   call void @aligned_barrier()
@@ -702,8 +695,7 @@ define internal void @write_then_barrier0(ptr %p) {
 define internal void @barrier_then_write0(ptr %p) {
 ; MODULE-LABEL: define {{[^@]+}}@barrier_then_write0
 ; MODULE-SAME: (ptr [[P:%.*]]) {
-; MODULE-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
-; MODULE-NEXT:    store i32 0, ptr addrspace(1) [[TMP1]], align 4
+; MODULE-NEXT:    store i32 0, ptr [[P]], align 4
 ; MODULE-NEXT:    ret void
 ;
 ; CGSCC-LABEL: define {{[^@]+}}@barrier_then_write0
@@ -719,8 +711,7 @@ define internal void @barrier_then_write0(ptr %p) {
 define internal void @barrier_then_write_then_barrier0(ptr %p) {
 ; MODULE-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier0
 ; MODULE-SAME: (ptr [[P:%.*]]) {
-; MODULE-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
-; MODULE-NEXT:    store i32 0, ptr addrspace(1) [[TMP1]], align 4
+; MODULE-NEXT:    store i32 0, ptr [[P]], align 4
 ; MODULE-NEXT:    call void @aligned_barrier()
 ; MODULE-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
index 81e11e048dfd0..2f1aadc073142 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
@@ -85,10 +85,8 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
 ; CHECK:       region.guarded:
 ; CHECK-NEXT:    store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]]
-; CHECK-NEXT:    [[TMP4:%.*]] = addrspacecast ptr [[ARRAYIDX1_I]] to ptr addrspace(1)
-; CHECK-NEXT:    store i32 1, ptr addrspace(1) [[TMP4]], align 4, !noalias [[META7]]
-; CHECK-NEXT:    [[TMP5:%.*]] = addrspacecast ptr [[ARRAYIDX2_I]] to ptr addrspace(1)
-; CHECK-NEXT:    store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(1) [[TMP5]], align 4, !noalias [[META7]]
+; CHECK-NEXT:    store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META7]]
+; CHECK-NEXT:    store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META7]]
 ; CHECK-NEXT:    br label [[REGION_GUARDED_END:%.*]]
 ; CHECK:       region.guarded.end:
 ; CHECK-NEXT:    br label [[REGION_BARRIER]]
@@ -109,17 +107,16 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
 ; CHECK-NEXT:    [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM4_I]]
 ; CHECK-NEXT:    br label [[REGION_CHECK_TID5:%.*]]
 ; CHECK:       region.check.tid5:
-; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
-; CHECK-NEXT:    br i1 [[TMP7]], label [[REGION_GUARDED4:%.*]], label [[REGION_BARRIER2:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label [[REGION_GUARDED4:%.*]], label [[REGION_BARRIER2:%.*]]
 ; CHECK:       region.guarded4:
-; CHECK-NEXT:    [[TMP8:%.*]] = addrspacecast ptr [[ARRAYIDX5_I]] to ptr addrspace(1)
-; CHECK-NEXT:    store i32 [[SUB3_I]], ptr addrspace(1) [[TMP8]], align 4, !noalias [[META7]]
+; CHECK-NEXT:    store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META7]]
 ; CHECK-NEXT:    br label [[REGION_GUARDED_END1:%.*]]
 ; CHECK:       region.guarded.end1:
 ; CHECK-NEXT:    br label [[REGION_BARRIER2]]
 ; CHECK:       region.barrier2:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP6]])
+; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP4]])
 ; CHECK-NEXT:    br label [[REGION_EXIT3]]
 ; CHECK:       region.exit3:
 ; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
@@ -131,17 +128,16 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
 ; CHECK-NEXT:    [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]]
 ; CHECK-NEXT:    br label [[REGION_CHECK_TID10:%.*]]
 ; CHECK:       region.check.tid10:
-; CHECK-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0
-; CHECK-NEXT:    br i1 [[TMP10]], label [[REGION_GUARDED9:%.*]], label [[REGION_BARRIER7:%.*]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
+; CHECK-NEXT:    br i1 [[TMP7]], label [[REGION_GUARDED9:%.*]], label [[REGION_BARRIER7:%.*]]
 ; CHECK:       region.guarded9:
-; CHECK-NEXT:    [[TMP11:%.*]] = addrspacecast ptr [[ARRAYIDX7_I]] to ptr addrspace(1)
-; CHECK-NEXT:    store i32 [[CALL_I]], ptr addrspace(1) [[TMP11]], align 4, !noalias [[META7]]
+; CHECK-NEXT:    store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META7]]
 ; CHECK-NEXT:    br label [[REGION_GUARDED_END6:%.*]]
 ; CHECK:       region.guarded.end6:
 ; CHECK-NEXT:    br label [[REGION_BARRIER7]]
 ; CHECK:       region.barrier7:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP9]])
+; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP6]])
 ; CHECK-NEXT:    br label [[REGION_EXIT8:%.*]]
 ; CHECK:       region.exit8:
 ; CHECK-NEXT:    [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
@@ -149,17 +145,16 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
 ; CHECK-NEXT:    [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]]
 ; CHECK-NEXT:    br label [[REGION_CHECK_TID15:%.*]]
 ; CHECK:       region.check.tid15:
-; CHECK-NEXT:    [[TMP12:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0
-; CHECK-NEXT:    br i1 [[TMP13]], label [[REGION_GUARDED14:%.*]], label [[REGION_BARRIER12:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
+; CHECK-NEXT:    br i1 [[TMP9]], label [[REGION_GUARDED14:%.*]], label [[REGION_BARRIER12:%.*]]
 ; CHECK:       region.guarded14:
-; CHECK-NEXT:    [[TMP14:%.*]] = addrspacecast ptr [[ARRAYIDX10_I]] to ptr addrspace(1)
-; CHECK-NEXT:    store i32 [[CALL8_I]], ptr addrspace(1) [[TMP14]], align 4, !noalias [[META7]]
+; CHECK-NEXT:    store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META7]]
 ; CHECK-NEXT:    br label [[REGION_GUARDED_END11:%.*]]
 ; CHECK:       region.guarded.end11:
 ; CHECK-NEXT:    br label [[REGION_BARRIER12]]
 ; CHECK:       region.barrier12:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP12]])
+; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP8]])
 ; CHECK-NEXT:    br label [[REGION_EXIT13:%.*]]
 ; CHECK:       region.exit13:
 ; CHECK-NEXT:    [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
@@ -167,17 +162,16 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
 ; CHECK-NEXT:    [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]]
 ; CHECK-NEXT:    br label [[REGION_CHECK_TID20:%.*]]
 ; CHECK:       region.check.tid20:
-; CHECK-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0
-; CHECK-NEXT:    br i1 [[TMP16]], label [[REGION_GUARDED19:%.*]], label [[REGION_BARRIER17:%.*]]
+; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
+; CHECK-NEXT:    br i1 [[TMP11]], label [[REGION_GUARDED19:%.*]], label [[REGION_BARRIER17:%.*]]
 ; CHECK:       region.guarded19:
-; CHECK-NEXT:    [[TMP17:%.*]] = addrspacecast ptr [[ARRAYIDX13_I]] to ptr addrspace(1)
-; CHECK-NEXT:    store i32 [[CALL11_I]], ptr addrspace(1) [[TMP17]], align 4, !noalias [[META7]]
+; CHECK-NEXT:    store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META7]]
 ; CHECK-NEXT:    br label [[REGION_GUARDED_END16:%.*]]
 ; CHECK:       region.guarded.end16:
 ; CHECK-NEXT:    br label [[REGION_BARRIER17]]
 ; CHECK:       region.barrier17:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP15]])
+; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP10]])
 ; CHECK-NEXT:    br label [[REGION_EXIT18:%.*]]
 ; CHECK:       region.exit18:
 ; CHECK-NEXT:    [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
@@ -238,13 +232,11 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
 ; CHECK-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
 ; CHECK-DISABLED-NEXT:    store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]]
 ; CHECK-DISABLED-NEXT:    [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1
-; CHECK-DISABLED-NEXT:    [[TMP2:%.*]] = addrspacecast ptr [[ARRAYIDX1_I]] to ptr addrspace(1)
-; CHECK-DISABLED-NEXT:    store i32 1, ptr addrspace(1) [[TMP2]], align 4, !noalias [[META7]]
+; CHECK-DISABLED-NEXT:    store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META7]]
 ; CHECK-DISABLED-NEXT:    [[SEXT:%.*]] = shl i64 [[N]], 32
 ; CHECK-DISABLED-NEXT:    [[IDXPROM_I:%.*]] = ashr exact i64 [[SEXT]], 32
 ; CHECK-DISABLED-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM_I]]
-; CHECK-DISABLED-NEXT:    [[TMP3:%.*]] = addrspacecast ptr [[ARRAYIDX2_I]] to ptr addrspace(1)
-; CHECK-DISABLED-NEXT:    store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(1) [[TMP3]], align 4, !noalias [[META7]]
+; CHECK-DISABLED-NEXT:    store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META7]]
 ; CHECK-DISABLED-NEXT:    call void @usei8ptr(ptr captures(none) [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]]
 ; CHECK-DISABLED-NEXT:    br label [[FOR_COND_I:%.*]]
 ; CHECK-DISABLED:       for.cond.i:
@@ -256,8 +248,7 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
 ; CHECK-DISABLED-NEXT:    [[SUB3_I:%.*]] = add nsw i32 [[I_0_I]], -1
 ; CHECK-DISABLED-NEXT:    [[IDXPROM4_I:%.*]] = zext i32 [[I_0_I]] to i64
 ; CHECK-DISABLED-NEXT:    [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM4_I]]
-; CHECK-DISABLED-NEXT:    [[TMP4:%.*]] = addrspacecast ptr [[ARRAYIDX5_I]] to ptr addrspace(1)
-; CHECK-DISABLED-NEXT:    store i32 [[SUB3_I]], ptr addrspace(1) [[TMP4]], align 4, !noalias [[META7]]
+; CHECK-DISABLED-NEXT:    store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META7]]
 ; CHECK-DISABLED-NEXT:    [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
 ; CHECK-DISABLED-NEXT:    br label [[FOR_COND_I]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK-DISABLED:       __omp_outlined__.exit:
@@ -265,18 +256,15 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
 ; CHECK-DISABLED-NEXT:    [[CALL_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias [[META7]]
 ; CHECK-DISABLED-NEXT:    [[IDXPROM6_I:%.*]] = sext i32 [[CALL_I]] to i64
 ; CHECK-DISABLED-NEXT:    [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]]
-; CHECK-DISABLED-NEXT:    [[TMP5:%.*]] = addrspacecast ptr [[ARRAYIDX7_I]] to ptr addrspace(1)
-; CHECK-DISABLED-NEXT:    store i32 [[CALL_I]], ptr addrspace(1) [[TMP5]], align 4, !noalias [[META7]]
+; CHECK-DISABLED-NEXT:    store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META7]]
 ; CHECK-DISABLED-NEXT:    [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
 ; CHECK-DISABLED-NEXT:    [[IDXPROM9_I:%.*]] = sext i32 [[CALL8_I]] to i64
 ; CHECK-DISABLED-NEXT:    [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]]
-; CHECK-DISABLED-NEXT:    [[TMP6:%.*]] = addrspacecast ptr [[ARRAYIDX10_I]] to ptr addrspace(1)
-; CHECK-DISABLED-NEXT:    store i32 [[CALL8_I]], ptr addrspace(1) [[TMP6]], align 4, !noalias [[META7]]
+; CHECK-DISABLED-NEXT:    store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META7]]
 ; CHECK-DISABLED-NEXT:    [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
 ; CHECK-DISABLED-NEXT:    [[IDXPROM12_I:%.*]] = sext i32 [[CALL11_I]] to i64
 ; CHECK-DISABLED-NEXT:    [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]]
-; CHECK-DISABLED-NEXT:    [[TMP7:%.*]] = addrspacecast ptr [[ARRAYIDX13_I]] to ptr addrspace(1)
-; CHECK-DISABLED-NEXT:    store i32 [[CALL11_I]], ptr addrspace(1) [[TMP7]], align 4, !noalias [[META7]]
+; CHECK-DISABLED-NEXT:    store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META7]]
 ; CHECK-DISABLED-NEXT:    [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
 ; CHECK-DISABLED-NEXT:    [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
 ; CHECK-DISABLED-NEXT:    [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]


        


More information about the llvm-commits mailing list