[llvm-branch-commits] [llvm] [Attributor] Use more appropriate approach to check flat address space (PR #108713)
Shilei Tian via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Sep 18 11:49:15 PDT 2024
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108713
>From fb2ed73b44facf865312d7efe32053718fcd6458 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Thu, 12 Sep 2024 15:25:43 -0400
Subject: [PATCH] [Attributor] Use more appropriate approach to check flat
address space
---
llvm/include/llvm/Transforms/IPO/Attributor.h | 7 ++---
.../Transforms/IPO/AttributorAttributes.cpp | 26 ++++++++++++++-----
.../CodeGen/AMDGPU/simple-indirect-call.ll | 5 ++--
.../Attributor/address_space_info.ll | 4 ++-
.../Attributor/memory_locations_gpu.ll | 8 +++---
.../test/Transforms/Attributor/nocapture-1.ll | 4 +--
.../reduced/openmp_opt_constant_type_crash.ll | 1 -
.../Transforms/Attributor/value-simplify.ll | 3 +--
.../Transforms/OpenMP/nested_parallelism.ll | 4 +--
.../OpenMP/spmdization_kernel_env_dep.ll | 25 +++++++++---------
10 files changed, 51 insertions(+), 36 deletions(-)
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 921fe945539510..59bae547522ea7 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6267,11 +6267,12 @@ struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
return (AA->getIdAddr() == &ID);
}
- // No address space which indicates the associated value is dead.
- static const uint32_t NoAddressSpace = ~0U;
-
/// Unique ID (due to the unique address)
static const char ID;
+
+protected:
+ // Invalid address space which indicates the associated value is dead.
+ static const uint32_t InvalidAddressSpace = ~0U;
};
struct AAAllocationInfo : public StateWrapper<BooleanState, AbstractAttribute> {
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 217c7cccb5775a..b2888f556d7d0d 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12571,8 +12571,19 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
void initialize(Attributor &A) override {
assert(getAssociatedType()->isPtrOrPtrVectorTy() &&
"Associated value is not a pointer");
- if (getAssociatedType()->getPointerAddressSpace())
+
+ if (!A.getInfoCache().getDL().getFlatAddressSpace().has_value()) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+
+ unsigned FlatAS = A.getInfoCache().getDL().getFlatAddressSpace().value();
+ unsigned AS = getAssociatedType()->getPointerAddressSpace();
+ if (AS != FlatAS) {
+ [[maybe_unused]] bool R = takeAddressSpace(AS);
+ assert(R && "The take should happen");
indicateOptimisticFixpoint();
+ }
}
ChangeStatus updateImpl(Attributor &A) override {
@@ -12594,12 +12605,13 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
- Value *AssociatedValue = &getAssociatedValue();
- Value *OriginalValue = peelAddrspacecast(AssociatedValue);
- if (getAddressSpace() == NoAddressSpace ||
+ if (getAddressSpace() == InvalidAddressSpace ||
getAddressSpace() == getAssociatedType()->getPointerAddressSpace())
return ChangeStatus::UNCHANGED;
+ Value *AssociatedValue = &getAssociatedValue();
+ Value *OriginalValue = peelAddrspacecast(AssociatedValue);
+
PointerType *NewPtrTy =
PointerType::get(getAssociatedType()->getContext(), getAddressSpace());
bool UseOriginalValue =
@@ -12646,17 +12658,17 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
if (!isValidState())
return "addrspace(<invalid>)";
return "addrspace(" +
- (AssumedAddressSpace == NoAddressSpace
+ (AssumedAddressSpace == InvalidAddressSpace
? "none"
: std::to_string(AssumedAddressSpace)) +
")";
}
private:
- uint32_t AssumedAddressSpace = NoAddressSpace;
+ uint32_t AssumedAddressSpace = InvalidAddressSpace;
bool takeAddressSpace(uint32_t AS) {
- if (AssumedAddressSpace == NoAddressSpace) {
+ if (AssumedAddressSpace == InvalidAddressSpace) {
AssumedAddressSpace = AS;
return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
index cca7b49996ff3b..971161a1c59855 100644
--- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
@@ -34,8 +34,9 @@ define amdgpu_kernel void @test_simple_indirect_call() {
; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] {
; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
-; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
-; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
+; ATTRIBUTOR_GCN-NEXT: [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr
+; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr [[FPTR_CAST]], align 8
+; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8
; ATTRIBUTOR_GCN-NEXT: call void @indirect()
; ATTRIBUTOR_GCN-NEXT: ret void
;
diff --git a/llvm/test/Transforms/Attributor/address_space_info.ll b/llvm/test/Transforms/Attributor/address_space_info.ll
index 73dd93c55b819b..0c8b06ac6666a4 100644
--- a/llvm/test/Transforms/Attributor/address_space_info.ll
+++ b/llvm/test/Transforms/Attributor/address_space_info.ll
@@ -1,5 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --prefix-filecheck-ir-name true
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefix=CHECK
+
+; REQUIRES: amdgpu-registered-target
@dst = dso_local addrspace(1) externally_initialized global i32 0, align 4
@g1 = dso_local addrspace(1) externally_initialized global ptr null, align 4
diff --git a/llvm/test/Transforms/Attributor/memory_locations_gpu.ll b/llvm/test/Transforms/Attributor/memory_locations_gpu.ll
index c10883b54ad591..db4647232c882e 100644
--- a/llvm/test/Transforms/Attributor/memory_locations_gpu.ll
+++ b/llvm/test/Transforms/Attributor/memory_locations_gpu.ll
@@ -29,7 +29,7 @@ define i32 @test_const_as_global2() {
; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
; CHECK-LABEL: define {{[^@]+}}@test_const_as_global2
; CHECK-SAME: () #[[ATTR1]] {
-; CHECK-NEXT: [[L2:%.*]] = load i32, ptr addrspace(4) @G, align 4
+; CHECK-NEXT: [[L2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @G to ptr), align 4
; CHECK-NEXT: ret i32 [[L2]]
;
%l2 = load i32, ptr addrspacecast (ptr addrspace(4) @G to ptr)
@@ -41,7 +41,8 @@ define i32 @test_const_as_call1() {
; CHECK-LABEL: define {{[^@]+}}@test_const_as_call1
; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
; CHECK-NEXT: [[P1:%.*]] = call ptr addrspace(4) @ptr_to_const() #[[ATTR4:[0-9]+]]
-; CHECK-NEXT: [[L1:%.*]] = load i32, ptr addrspace(4) [[P1]], align 4
+; CHECK-NEXT: [[C1:%.*]] = addrspacecast ptr addrspace(4) [[P1]] to ptr
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[C1]], align 4
; CHECK-NEXT: ret i32 [[L1]]
;
%p1 = call ptr addrspace(4) @ptr_to_const()
@@ -71,7 +72,8 @@ define i32 @test_shared_as_call1() {
; CHECK-LABEL: define {{[^@]+}}@test_shared_as_call1
; CHECK-SAME: () #[[ATTR2]] {
; CHECK-NEXT: [[P1:%.*]] = call ptr addrspace(3) @ptr_to_shared() #[[ATTR4]]
-; CHECK-NEXT: [[L1:%.*]] = load i32, ptr addrspace(3) [[P1]], align 4
+; CHECK-NEXT: [[C1:%.*]] = addrspacecast ptr addrspace(3) [[P1]] to ptr
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[C1]], align 4
; CHECK-NEXT: ret i32 [[L1]]
;
%p1 = call ptr addrspace(3) @ptr_to_shared()
diff --git a/llvm/test/Transforms/Attributor/nocapture-1.ll b/llvm/test/Transforms/Attributor/nocapture-1.ll
index 3401ddfdd7d758..de5f31e470edfc 100644
--- a/llvm/test/Transforms/Attributor/nocapture-1.ll
+++ b/llvm/test/Transforms/Attributor/nocapture-1.ll
@@ -257,7 +257,7 @@ define i32 @nc1_addrspace(ptr %q, ptr addrspace(1) %p, i1 %b) {
; TUNIT-NEXT: [[TMP:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
; TUNIT-NEXT: [[TMP2:%.*]] = select i1 [[B]], ptr [[TMP]], ptr [[Q]]
; TUNIT-NEXT: [[VAL:%.*]] = load i32, ptr [[TMP2]], align 4
-; TUNIT-NEXT: store i32 0, ptr addrspace(1) [[P]], align 4
+; TUNIT-NEXT: store i32 0, ptr [[TMP]], align 4
; TUNIT-NEXT: store ptr [[Q]], ptr @g, align 8
; TUNIT-NEXT: ret i32 [[VAL]]
;
@@ -272,7 +272,7 @@ define i32 @nc1_addrspace(ptr %q, ptr addrspace(1) %p, i1 %b) {
; CGSCC-NEXT: [[TMP:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
; CGSCC-NEXT: [[TMP2:%.*]] = select i1 [[B]], ptr [[TMP]], ptr [[Q]]
; CGSCC-NEXT: [[VAL:%.*]] = load i32, ptr [[TMP2]], align 4
-; CGSCC-NEXT: store i32 0, ptr addrspace(1) [[P]], align 4
+; CGSCC-NEXT: store i32 0, ptr [[TMP]], align 4
; CGSCC-NEXT: store ptr [[Q]], ptr @g, align 8
; CGSCC-NEXT: ret i32 [[VAL]]
;
diff --git a/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll b/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll
index 518ed97f42bc10..fda72a6e31a0c7 100644
--- a/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll
+++ b/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll
@@ -116,7 +116,6 @@ cond.end: ; preds = %cond.true, %entry
; CHECK-LABEL: define {{[^@]+}}@_ZN6Kokkos4Impl14SubviewExtentsILj2ELj1EE3setIJLm0ELm0EEJiEEEbjjRKNS0_13ViewDimensionIJXspT_EEEENS0_5ALL_tEDpT0_.internalized
; CHECK-SAME: (ptr nocapture writeonly [[THIS:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[THIS]] to ptr addrspace(5)
; CHECK-NEXT: ret i1 false
;
;
diff --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll
index 68f179c88116e4..a5789790cc92a1 100644
--- a/llvm/test/Transforms/Attributor/value-simplify.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify.ll
@@ -838,8 +838,7 @@ define void @user() {
; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
; TUNIT-LABEL: define {{[^@]+}}@user
; TUNIT-SAME: () #[[ATTR5]] {
-; TUNIT-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspacecast (ptr addrspace(3) @ConstAS3Ptr to ptr) to ptr addrspace(3)
-; TUNIT-NEXT: store i32 0, ptr addrspace(3) [[TMP1]], align 4
+; TUNIT-NEXT: store i32 0, ptr addrspacecast (ptr addrspace(3) @ConstAS3Ptr to ptr), align 4
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(write)
diff --git a/llvm/test/Transforms/OpenMP/nested_parallelism.ll b/llvm/test/Transforms/OpenMP/nested_parallelism.ll
index 5c4386b24a3d5a..4f4a87cbddfec1 100644
--- a/llvm/test/Transforms/OpenMP/nested_parallelism.ll
+++ b/llvm/test/Transforms/OpenMP/nested_parallelism.ll
@@ -60,7 +60,7 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l13(ptr %dyn,
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED_I:%.*]], label [[_Z3FOOI_INTERNALIZED_EXIT:%.*]]
; CHECK: region.guarded.i:
; CHECK-NEXT: [[I_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[I:%.*]] to i32
-; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(3) @i_shared, align 16
+; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), align 16
; CHECK-NEXT: br label [[_Z3FOOI_INTERNALIZED_EXIT]]
; CHECK: _Z3fooi.internalized.exit:
; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]]
@@ -140,7 +140,7 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l16(ptr %dyn,
; CHECK-NEXT: [[I_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[I:%.*]] to i32
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
-; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(3) @i.i_shared, align 16
+; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), align 16
; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
diff --git a/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll b/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll
index 52be16c41f872d..ce7b4f89b893ff 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll
@@ -27,22 +27,21 @@ define i32 @fputs() {
define internal i32 @__kmpc_target_init(ptr %0, ptr %dyn) {
; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init
; AMDGPU-SAME: (ptr [[TMP0:%.*]], ptr [[DYN:%.*]]) #[[ATTR1:[0-9]+]] {
-; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(1) @__omp_offloading_10302_b20a40e_main_l4_kernel_environment to ptr), i64 2) to ptr addrspace(1)
-; AMDGPU-NEXT: [[TMP3:%.*]] = load i8, ptr addrspace(1) [[TMP2]], align 2
-; AMDGPU-NEXT: [[TMP4:%.*]] = and i8 [[TMP3]], 2
-; AMDGPU-NEXT: [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; AMDGPU-NEXT: [[TMP6:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR3:[0-9]+]]
-; AMDGPU-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
-; AMDGPU-NEXT: [[OR_COND:%.*]] = select i1 [[TMP5]], i1 [[TMP7]], i1 false
-; AMDGPU-NEXT: br i1 [[OR_COND]], label [[TMP8:%.*]], label [[TMP9:%.*]]
-; AMDGPU: 8:
+; AMDGPU-NEXT: [[TMP2:%.*]] = load i8, ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(1) @__omp_offloading_10302_b20a40e_main_l4_kernel_environment to ptr), i64 2), align 2
+; AMDGPU-NEXT: [[TMP3:%.*]] = and i8 [[TMP2]], 2
+; AMDGPU-NEXT: [[TMP4:%.*]] = icmp ne i8 [[TMP3]], 0
+; AMDGPU-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR3:[0-9]+]]
+; AMDGPU-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; AMDGPU-NEXT: [[OR_COND:%.*]] = select i1 [[TMP4]], i1 [[TMP6]], i1 false
+; AMDGPU-NEXT: br i1 [[OR_COND]], label [[TMP7:%.*]], label [[TMP8:%.*]]
+; AMDGPU: 7:
; AMDGPU-NEXT: store i8 0, ptr addrspace(3) null, align 2147483648
-; AMDGPU-NEXT: br label [[TMP9]]
+; AMDGPU-NEXT: br label [[TMP8]]
+; AMDGPU: 8:
+; AMDGPU-NEXT: br label [[TMP10:%.*]]
; AMDGPU: 9:
-; AMDGPU-NEXT: br label [[TMP11:%.*]]
-; AMDGPU: 10:
; AMDGPU-NEXT: unreachable
-; AMDGPU: 11:
+; AMDGPU: 10:
; AMDGPU-NEXT: ret i32 0
;
%2 = getelementptr %struct.ConfigurationEnvironmentTy.8, ptr %0, i64 0, i32 2
More information about the llvm-branch-commits
mailing list