[llvm] 37ea3b3 - Revert "Reapply "[AMDGPU] Make `getAssumedAddrSpace` return AS1 for pointer kernel arguments (#137488)""
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Fri May 30 19:06:21 PDT 2025
Author: Shilei Tian
Date: 2025-05-30T22:06:16-04:00
New Revision: 37ea3b32cdcb6c0dcecbcc4bf844f5190c7378dd
URL: https://github.com/llvm/llvm-project/commit/37ea3b32cdcb6c0dcecbcc4bf844f5190c7378dd
DIFF: https://github.com/llvm/llvm-project/commit/37ea3b32cdcb6c0dcecbcc4bf844f5190c7378dd.diff
LOG: Revert "Reapply "[AMDGPU] Make `getAssumedAddrSpace` return AS1 for pointer kernel arguments (#137488)""
This reverts commit 4efc13f8ff1eaf4f9fb1fcea8d4552b3eca052ca.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/lib/Transforms/IPO/AttributorAttributes.cpp
llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
llvm/test/Transforms/OpenMP/barrier_removal.ll
llvm/test/Transforms/OpenMP/spmdization_guarding.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 1ab4458bafcc3..9091fdd5c959f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -977,10 +977,6 @@ bool AMDGPUTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
}
unsigned AMDGPUTargetMachine::getAssumedAddrSpace(const Value *V) const {
- if (auto *Arg = dyn_cast<Argument>(V);
- Arg && AMDGPU::isKernelCC(Arg->getParent()) && !Arg->hasByRefAttr())
- return AMDGPUAS::GLOBAL_ADDRESS;
-
const auto *LD = dyn_cast<LoadInst>(V);
if (!LD) // TODO: Handle invariant load like constant.
return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 3ce03a4b96f61..470c5308edca4 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12592,18 +12592,29 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
}
ChangeStatus updateImpl(Attributor &A) override {
+ unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
uint32_t OldAddressSpace = AssumedAddressSpace;
auto CheckAddressSpace = [&](Value &Obj) {
if (isa<UndefValue>(&Obj))
return true;
+ // If an argument in flat address space only has addrspace cast uses, and
+ // those casts are same, then we take the dst addrspace.
if (auto *Arg = dyn_cast<Argument>(&Obj)) {
- auto *TTI =
- A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(
- *Arg->getParent());
- unsigned AssumedAS = TTI->getAssumedAddrSpace(Arg);
- if (AssumedAS != ~0U)
- return takeAddressSpace(AssumedAS);
+ if (Arg->getType()->getPointerAddressSpace() == FlatAS) {
+ unsigned CastAddrSpace = FlatAS;
+ for (auto *U : Arg->users()) {
+ auto *ASCI = dyn_cast<AddrSpaceCastInst>(U);
+ if (!ASCI)
+ return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
+ if (CastAddrSpace != FlatAS &&
+ CastAddrSpace != ASCI->getDestAddressSpace())
+ return false;
+ CastAddrSpace = ASCI->getDestAddressSpace();
+ }
+ if (CastAddrSpace != FlatAS)
+ return takeAddressSpace(CastAddrSpace);
+ }
}
return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
};
diff --git a/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll b/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
index cc2c80060231c..d1a6414fe49ae 100644
--- a/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
+++ b/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
@@ -246,7 +246,8 @@ define void @foo(ptr addrspace(3) %val) {
define void @kernel_argument_promotion_pattern_intra_procedure(ptr %p, i32 %val) {
; CHECK-LABEL: define void @kernel_argument_promotion_pattern_intra_procedure(
; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: store i32 [[VAL]], ptr [[P]], align 4
+; CHECK-NEXT: [[P_CAST_0:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
+; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[P_CAST_0]], align 4
; CHECK-NEXT: ret void
;
%p.cast.0 = addrspacecast ptr %p to ptr addrspace(1)
@@ -258,7 +259,8 @@ define void @kernel_argument_promotion_pattern_intra_procedure(ptr %p, i32 %val)
define internal void @use_argument_after_promotion(ptr %p, i32 %val) {
; CHECK-LABEL: define internal void @use_argument_after_promotion(
; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: store i32 [[VAL]], ptr [[P]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
+; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[TMP1]], align 4
; CHECK-NEXT: ret void
;
store i32 %val, ptr %p
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
index 60bb38f863e8e..99fe986cf6378 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
@@ -66,9 +66,7 @@ define amdgpu_kernel void @store_global_from_flat(ptr %generic_scalar) #0 {
define amdgpu_kernel void @store_group_from_flat(ptr %generic_scalar) #0 {
; CHECK-LABEL: define amdgpu_kernel void @store_group_from_flat(
; CHECK-SAME: ptr [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[GENERIC_SCALAR]] to ptr addrspace(1)
-; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[TMP1]] to ptr
-; CHECK-NEXT: [[_TMP0:%.*]] = addrspacecast ptr [[TMP2]] to ptr addrspace(3)
+; CHECK-NEXT: [[_TMP0:%.*]] = addrspacecast ptr [[GENERIC_SCALAR]] to ptr addrspace(3)
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(3) [[_TMP0]], align 4
; CHECK-NEXT: ret void
;
@@ -80,9 +78,7 @@ define amdgpu_kernel void @store_group_from_flat(ptr %generic_scalar) #0 {
define amdgpu_kernel void @store_private_from_flat(ptr %generic_scalar) #0 {
; CHECK-LABEL: define amdgpu_kernel void @store_private_from_flat(
; CHECK-SAME: ptr [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[GENERIC_SCALAR]] to ptr addrspace(1)
-; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[TMP1]] to ptr
-; CHECK-NEXT: [[_TMP0:%.*]] = addrspacecast ptr [[TMP2]] to ptr addrspace(5)
+; CHECK-NEXT: [[_TMP0:%.*]] = addrspacecast ptr [[GENERIC_SCALAR]] to ptr addrspace(5)
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[_TMP0]], align 4
; CHECK-NEXT: ret void
;
@@ -140,10 +136,8 @@ define amdgpu_kernel void @load_store_private(ptr addrspace(5) nocapture %input,
define amdgpu_kernel void @load_store_flat(ptr nocapture %input, ptr nocapture %output) #0 {
; CHECK-LABEL: define amdgpu_kernel void @load_store_flat(
; CHECK-SAME: ptr captures(none) [[INPUT:%.*]], ptr captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(1)
-; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
-; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[TMP1]], align 4
-; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[TMP2]], align 4
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[INPUT]], align 4
+; CHECK-NEXT: store i32 [[VAL]], ptr [[OUTPUT]], align 4
; CHECK-NEXT: ret void
;
%val = load i32, ptr %input, align 4
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
index 1c317786d1c20..57453d63d7e8a 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
@@ -48,8 +48,7 @@ define amdgpu_kernel void @memset_global_to_flat_no_md(ptr addrspace(1) %global.
define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(
; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
-; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
+; CHECK-NEXT: call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
; CHECK-NEXT: ret void
;
%cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -60,8 +59,7 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(ptr %dest,
define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(ptr %dest, ptr addrspace(3) %src.group.ptr) #0 {
; CHECK-LABEL: define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(
; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
-; CHECK-NEXT: call void @llvm.memcpy.inline.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 42, i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 42, i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
; CHECK-NEXT: ret void
;
%cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -72,8 +70,7 @@ define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(ptr
define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(ptr addrspace(3) %dest.group.ptr, ptr %src.ptr, i64 %size) #0 {
; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(
; CHECK-SAME: ptr addrspace(3) [[DEST_GROUP_PTR:%.*]], ptr [[SRC_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[SRC_PTR]] to ptr addrspace(1)
-; CHECK-NEXT: call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) align 4 [[DEST_GROUP_PTR]], ptr addrspace(1) align 4 [[TMP1]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
+; CHECK-NEXT: call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) align 4 [[DEST_GROUP_PTR]], ptr align 4 [[SRC_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
; CHECK-NEXT: ret void
;
%cast.dest = addrspacecast ptr addrspace(3) %dest.group.ptr to ptr
@@ -119,8 +116,7 @@ define amdgpu_kernel void @memcpy_group_to_flat_replace_dest_global(ptr addrspac
define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(
; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
-; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa.struct [[TBAA_STRUCT8:![0-9]+]]
+; CHECK-NEXT: call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa.struct [[TBAA_STRUCT8:![0-9]+]]
; CHECK-NEXT: ret void
;
%cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -131,8 +127,7 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struc
define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(
; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
-; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
; CHECK-NEXT: ret void
;
%cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -143,10 +138,8 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(ptr
define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(ptr %dest0, ptr %dest1, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
; CHECK-LABEL: define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(
; CHECK-SAME: ptr [[DEST0:%.*]], ptr [[DEST1:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST0]] to ptr addrspace(1)
-; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DEST1]] to ptr addrspace(1)
-; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP2]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST0]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false)
; CHECK-NEXT: ret void
;
%cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
@@ -169,8 +162,7 @@ define amdgpu_kernel void @memcpy_group_flat_to_flat_self(ptr addrspace(3) %grou
define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(ptr %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 {
; CHECK-LABEL: define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(
; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1)
-; CHECK-NEXT: call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
+; CHECK-NEXT: call void @llvm.memmove.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]]
; CHECK-NEXT: ret void
;
%cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr
diff --git a/llvm/test/Transforms/OpenMP/barrier_removal.ll b/llvm/test/Transforms/OpenMP/barrier_removal.ll
index 56f730ccb4189..f662d5dd85b2b 100644
--- a/llvm/test/Transforms/OpenMP/barrier_removal.ll
+++ b/llvm/test/Transforms/OpenMP/barrier_removal.ll
@@ -682,18 +682,11 @@ m:
}
define internal void @write_then_barrier0(ptr %p) {
-; MODULE-LABEL: define {{[^@]+}}@write_then_barrier0
-; MODULE-SAME: (ptr [[P:%.*]]) {
-; MODULE-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
-; MODULE-NEXT: store i32 0, ptr addrspace(1) [[TMP1]], align 4
-; MODULE-NEXT: call void @aligned_barrier()
-; MODULE-NEXT: ret void
-;
-; CGSCC-LABEL: define {{[^@]+}}@write_then_barrier0
-; CGSCC-SAME: (ptr [[P:%.*]]) {
-; CGSCC-NEXT: store i32 0, ptr [[P]], align 4
-; CGSCC-NEXT: call void @aligned_barrier()
-; CGSCC-NEXT: ret void
+; CHECK-LABEL: define {{[^@]+}}@write_then_barrier0
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: store i32 0, ptr [[P]], align 4
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: ret void
;
store i32 0, ptr %p
call void @aligned_barrier()
@@ -702,8 +695,7 @@ define internal void @write_then_barrier0(ptr %p) {
define internal void @barrier_then_write0(ptr %p) {
; MODULE-LABEL: define {{[^@]+}}@barrier_then_write0
; MODULE-SAME: (ptr [[P:%.*]]) {
-; MODULE-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
-; MODULE-NEXT: store i32 0, ptr addrspace(1) [[TMP1]], align 4
+; MODULE-NEXT: store i32 0, ptr [[P]], align 4
; MODULE-NEXT: ret void
;
; CGSCC-LABEL: define {{[^@]+}}@barrier_then_write0
@@ -719,8 +711,7 @@ define internal void @barrier_then_write0(ptr %p) {
define internal void @barrier_then_write_then_barrier0(ptr %p) {
; MODULE-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier0
; MODULE-SAME: (ptr [[P:%.*]]) {
-; MODULE-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
-; MODULE-NEXT: store i32 0, ptr addrspace(1) [[TMP1]], align 4
+; MODULE-NEXT: store i32 0, ptr [[P]], align 4
; MODULE-NEXT: call void @aligned_barrier()
; MODULE-NEXT: ret void
;
diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
index 81e11e048dfd0..2f1aadc073142 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
@@ -85,10 +85,8 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; CHECK: region.guarded:
; CHECK-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ARRAYIDX1_I]] to ptr addrspace(1)
-; CHECK-NEXT: store i32 1, ptr addrspace(1) [[TMP4]], align 4, !noalias [[META7]]
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[ARRAYIDX2_I]] to ptr addrspace(1)
-; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(1) [[TMP5]], align 4, !noalias [[META7]]
+; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META7]]
+; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META7]]
; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]]
; CHECK: region.guarded.end:
; CHECK-NEXT: br label [[REGION_BARRIER]]
@@ -109,17 +107,16 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM4_I]]
; CHECK-NEXT: br label [[REGION_CHECK_TID5:%.*]]
; CHECK: region.check.tid5:
-; CHECK-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
-; CHECK-NEXT: br i1 [[TMP7]], label [[REGION_GUARDED4:%.*]], label [[REGION_BARRIER2:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label [[REGION_GUARDED4:%.*]], label [[REGION_BARRIER2:%.*]]
; CHECK: region.guarded4:
-; CHECK-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[ARRAYIDX5_I]] to ptr addrspace(1)
-; CHECK-NEXT: store i32 [[SUB3_I]], ptr addrspace(1) [[TMP8]], align 4, !noalias [[META7]]
+; CHECK-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META7]]
; CHECK-NEXT: br label [[REGION_GUARDED_END1:%.*]]
; CHECK: region.guarded.end1:
; CHECK-NEXT: br label [[REGION_BARRIER2]]
; CHECK: region.barrier2:
-; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP6]])
+; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP4]])
; CHECK-NEXT: br label [[REGION_EXIT3]]
; CHECK: region.exit3:
; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
@@ -131,17 +128,16 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]]
; CHECK-NEXT: br label [[REGION_CHECK_TID10:%.*]]
; CHECK: region.check.tid10:
-; CHECK-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0
-; CHECK-NEXT: br i1 [[TMP10]], label [[REGION_GUARDED9:%.*]], label [[REGION_BARRIER7:%.*]]
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
+; CHECK-NEXT: br i1 [[TMP7]], label [[REGION_GUARDED9:%.*]], label [[REGION_BARRIER7:%.*]]
; CHECK: region.guarded9:
-; CHECK-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[ARRAYIDX7_I]] to ptr addrspace(1)
-; CHECK-NEXT: store i32 [[CALL_I]], ptr addrspace(1) [[TMP11]], align 4, !noalias [[META7]]
+; CHECK-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META7]]
; CHECK-NEXT: br label [[REGION_GUARDED_END6:%.*]]
; CHECK: region.guarded.end6:
; CHECK-NEXT: br label [[REGION_BARRIER7]]
; CHECK: region.barrier7:
-; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP9]])
+; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP6]])
; CHECK-NEXT: br label [[REGION_EXIT8:%.*]]
; CHECK: region.exit8:
; CHECK-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
@@ -149,17 +145,16 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]]
; CHECK-NEXT: br label [[REGION_CHECK_TID15:%.*]]
; CHECK: region.check.tid15:
-; CHECK-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0
-; CHECK-NEXT: br i1 [[TMP13]], label [[REGION_GUARDED14:%.*]], label [[REGION_BARRIER12:%.*]]
+; CHECK-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
+; CHECK-NEXT: br i1 [[TMP9]], label [[REGION_GUARDED14:%.*]], label [[REGION_BARRIER12:%.*]]
; CHECK: region.guarded14:
-; CHECK-NEXT: [[TMP14:%.*]] = addrspacecast ptr [[ARRAYIDX10_I]] to ptr addrspace(1)
-; CHECK-NEXT: store i32 [[CALL8_I]], ptr addrspace(1) [[TMP14]], align 4, !noalias [[META7]]
+; CHECK-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META7]]
; CHECK-NEXT: br label [[REGION_GUARDED_END11:%.*]]
; CHECK: region.guarded.end11:
; CHECK-NEXT: br label [[REGION_BARRIER12]]
; CHECK: region.barrier12:
-; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP12]])
+; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP8]])
; CHECK-NEXT: br label [[REGION_EXIT13:%.*]]
; CHECK: region.exit13:
; CHECK-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
@@ -167,17 +162,16 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]]
; CHECK-NEXT: br label [[REGION_CHECK_TID20:%.*]]
; CHECK: region.check.tid20:
-; CHECK-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0
-; CHECK-NEXT: br i1 [[TMP16]], label [[REGION_GUARDED19:%.*]], label [[REGION_BARRIER17:%.*]]
+; CHECK-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
+; CHECK-NEXT: br i1 [[TMP11]], label [[REGION_GUARDED19:%.*]], label [[REGION_BARRIER17:%.*]]
; CHECK: region.guarded19:
-; CHECK-NEXT: [[TMP17:%.*]] = addrspacecast ptr [[ARRAYIDX13_I]] to ptr addrspace(1)
-; CHECK-NEXT: store i32 [[CALL11_I]], ptr addrspace(1) [[TMP17]], align 4, !noalias [[META7]]
+; CHECK-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META7]]
; CHECK-NEXT: br label [[REGION_GUARDED_END16:%.*]]
; CHECK: region.guarded.end16:
; CHECK-NEXT: br label [[REGION_BARRIER17]]
; CHECK: region.barrier17:
-; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP15]])
+; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP10]])
; CHECK-NEXT: br label [[REGION_EXIT18:%.*]]
; CHECK: region.exit18:
; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
@@ -238,13 +232,11 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
; CHECK-DISABLED-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]]
; CHECK-DISABLED-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1
-; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[ARRAYIDX1_I]] to ptr addrspace(1)
-; CHECK-DISABLED-NEXT: store i32 1, ptr addrspace(1) [[TMP2]], align 4, !noalias [[META7]]
+; CHECK-DISABLED-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META7]]
; CHECK-DISABLED-NEXT: [[SEXT:%.*]] = shl i64 [[N]], 32
; CHECK-DISABLED-NEXT: [[IDXPROM_I:%.*]] = ashr exact i64 [[SEXT]], 32
; CHECK-DISABLED-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM_I]]
-; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ARRAYIDX2_I]] to ptr addrspace(1)
-; CHECK-DISABLED-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(1) [[TMP3]], align 4, !noalias [[META7]]
+; CHECK-DISABLED-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META7]]
; CHECK-DISABLED-NEXT: call void @usei8ptr(ptr captures(none) [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]]
; CHECK-DISABLED-NEXT: br label [[FOR_COND_I:%.*]]
; CHECK-DISABLED: for.cond.i:
@@ -256,8 +248,7 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK-DISABLED-NEXT: [[SUB3_I:%.*]] = add nsw i32 [[I_0_I]], -1
; CHECK-DISABLED-NEXT: [[IDXPROM4_I:%.*]] = zext i32 [[I_0_I]] to i64
; CHECK-DISABLED-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM4_I]]
-; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ARRAYIDX5_I]] to ptr addrspace(1)
-; CHECK-DISABLED-NEXT: store i32 [[SUB3_I]], ptr addrspace(1) [[TMP4]], align 4, !noalias [[META7]]
+; CHECK-DISABLED-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META7]]
; CHECK-DISABLED-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
; CHECK-DISABLED-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK-DISABLED: __omp_outlined__.exit:
@@ -265,18 +256,15 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK-DISABLED-NEXT: [[CALL_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias [[META7]]
; CHECK-DISABLED-NEXT: [[IDXPROM6_I:%.*]] = sext i32 [[CALL_I]] to i64
; CHECK-DISABLED-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]]
-; CHECK-DISABLED-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[ARRAYIDX7_I]] to ptr addrspace(1)
-; CHECK-DISABLED-NEXT: store i32 [[CALL_I]], ptr addrspace(1) [[TMP5]], align 4, !noalias [[META7]]
+; CHECK-DISABLED-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META7]]
; CHECK-DISABLED-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
; CHECK-DISABLED-NEXT: [[IDXPROM9_I:%.*]] = sext i32 [[CALL8_I]] to i64
; CHECK-DISABLED-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]]
-; CHECK-DISABLED-NEXT: [[TMP6:%.*]] = addrspacecast ptr [[ARRAYIDX10_I]] to ptr addrspace(1)
-; CHECK-DISABLED-NEXT: store i32 [[CALL8_I]], ptr addrspace(1) [[TMP6]], align 4, !noalias [[META7]]
+; CHECK-DISABLED-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META7]]
; CHECK-DISABLED-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
; CHECK-DISABLED-NEXT: [[IDXPROM12_I:%.*]] = sext i32 [[CALL11_I]] to i64
; CHECK-DISABLED-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]]
-; CHECK-DISABLED-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[ARRAYIDX13_I]] to ptr addrspace(1)
-; CHECK-DISABLED-NEXT: store i32 [[CALL11_I]], ptr addrspace(1) [[TMP7]], align 4, !noalias [[META7]]
+; CHECK-DISABLED-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META7]]
; CHECK-DISABLED-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
; CHECK-DISABLED-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
; CHECK-DISABLED-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
More information about the llvm-commits
mailing list