[llvm-branch-commits] [llvm] [Attributor] Use more appropriate approach to check flat address space (PR #108713)

Shilei Tian via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Sep 18 11:49:15 PDT 2024


https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108713

>From fb2ed73b44facf865312d7efe32053718fcd6458 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Thu, 12 Sep 2024 15:25:43 -0400
Subject: [PATCH] [Attributor] Use more appropriate approach to check flat
 address space

---
 llvm/include/llvm/Transforms/IPO/Attributor.h |  7 ++---
 .../Transforms/IPO/AttributorAttributes.cpp   | 26 ++++++++++++++-----
 .../CodeGen/AMDGPU/simple-indirect-call.ll    |  5 ++--
 .../Attributor/address_space_info.ll          |  4 ++-
 .../Attributor/memory_locations_gpu.ll        |  8 +++---
 .../test/Transforms/Attributor/nocapture-1.ll |  4 +--
 .../reduced/openmp_opt_constant_type_crash.ll |  1 -
 .../Transforms/Attributor/value-simplify.ll   |  3 +--
 .../Transforms/OpenMP/nested_parallelism.ll   |  4 +--
 .../OpenMP/spmdization_kernel_env_dep.ll      | 25 +++++++++---------
 10 files changed, 51 insertions(+), 36 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 921fe945539510..59bae547522ea7 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6267,11 +6267,12 @@ struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
     return (AA->getIdAddr() == &ID);
   }
 
-  // No address space which indicates the associated value is dead.
-  static const uint32_t NoAddressSpace = ~0U;
-
   /// Unique ID (due to the unique address)
   static const char ID;
+
+protected:
+  // Invalid address space which indicates the associated value is dead.
+  static const uint32_t InvalidAddressSpace = ~0U;
 };
 
 struct AAAllocationInfo : public StateWrapper<BooleanState, AbstractAttribute> {
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 217c7cccb5775a..b2888f556d7d0d 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12571,8 +12571,19 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
   void initialize(Attributor &A) override {
     assert(getAssociatedType()->isPtrOrPtrVectorTy() &&
            "Associated value is not a pointer");
-    if (getAssociatedType()->getPointerAddressSpace())
+
+    if (!A.getInfoCache().getDL().getFlatAddressSpace().has_value()) {
+      indicatePessimisticFixpoint();
+      return;
+    }
+
+    unsigned FlatAS = A.getInfoCache().getDL().getFlatAddressSpace().value();
+    unsigned AS = getAssociatedType()->getPointerAddressSpace();
+    if (AS != FlatAS) {
+      [[maybe_unused]] bool R = takeAddressSpace(AS);
+      assert(R && "The take should happen");
       indicateOptimisticFixpoint();
+    }
   }
 
   ChangeStatus updateImpl(Attributor &A) override {
@@ -12594,12 +12605,13 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
 
   /// See AbstractAttribute::manifest(...).
   ChangeStatus manifest(Attributor &A) override {
-    Value *AssociatedValue = &getAssociatedValue();
-    Value *OriginalValue = peelAddrspacecast(AssociatedValue);
-    if (getAddressSpace() == NoAddressSpace ||
+    if (getAddressSpace() == InvalidAddressSpace ||
         getAddressSpace() == getAssociatedType()->getPointerAddressSpace())
       return ChangeStatus::UNCHANGED;
 
+    Value *AssociatedValue = &getAssociatedValue();
+    Value *OriginalValue = peelAddrspacecast(AssociatedValue);
+
     PointerType *NewPtrTy =
         PointerType::get(getAssociatedType()->getContext(), getAddressSpace());
     bool UseOriginalValue =
@@ -12646,17 +12658,17 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
     if (!isValidState())
       return "addrspace(<invalid>)";
     return "addrspace(" +
-           (AssumedAddressSpace == NoAddressSpace
+           (AssumedAddressSpace == InvalidAddressSpace
                 ? "none"
                 : std::to_string(AssumedAddressSpace)) +
            ")";
   }
 
 private:
-  uint32_t AssumedAddressSpace = NoAddressSpace;
+  uint32_t AssumedAddressSpace = InvalidAddressSpace;
 
   bool takeAddressSpace(uint32_t AS) {
-    if (AssumedAddressSpace == NoAddressSpace) {
+    if (AssumedAddressSpace == InvalidAddressSpace) {
       AssumedAddressSpace = AS;
       return true;
     }
diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
index cca7b49996ff3b..971161a1c59855 100644
--- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
@@ -34,8 +34,9 @@ define amdgpu_kernel void @test_simple_indirect_call() {
 ; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
 ; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] {
 ; ATTRIBUTOR_GCN-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
-; ATTRIBUTOR_GCN-NEXT:    store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
-; ATTRIBUTOR_GCN-NEXT:    [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
+; ATTRIBUTOR_GCN-NEXT:    [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr
+; ATTRIBUTOR_GCN-NEXT:    store ptr @indirect, ptr [[FPTR_CAST]], align 8
+; ATTRIBUTOR_GCN-NEXT:    [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8
 ; ATTRIBUTOR_GCN-NEXT:    call void @indirect()
 ; ATTRIBUTOR_GCN-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/Attributor/address_space_info.ll b/llvm/test/Transforms/Attributor/address_space_info.ll
index 73dd93c55b819b..0c8b06ac6666a4 100644
--- a/llvm/test/Transforms/Attributor/address_space_info.ll
+++ b/llvm/test/Transforms/Attributor/address_space_info.ll
@@ -1,5 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --prefix-filecheck-ir-name true
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefix=CHECK
+
+; REQUIRES: amdgpu-registered-target
 
 @dst = dso_local addrspace(1) externally_initialized global i32 0, align 4
 @g1 = dso_local addrspace(1) externally_initialized global ptr null, align 4
diff --git a/llvm/test/Transforms/Attributor/memory_locations_gpu.ll b/llvm/test/Transforms/Attributor/memory_locations_gpu.ll
index c10883b54ad591..db4647232c882e 100644
--- a/llvm/test/Transforms/Attributor/memory_locations_gpu.ll
+++ b/llvm/test/Transforms/Attributor/memory_locations_gpu.ll
@@ -29,7 +29,7 @@ define i32 @test_const_as_global2() {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
 ; CHECK-LABEL: define {{[^@]+}}@test_const_as_global2
 ; CHECK-SAME: () #[[ATTR1]] {
-; CHECK-NEXT:    [[L2:%.*]] = load i32, ptr addrspace(4) @G, align 4
+; CHECK-NEXT:    [[L2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @G to ptr), align 4
 ; CHECK-NEXT:    ret i32 [[L2]]
 ;
   %l2 = load i32, ptr addrspacecast (ptr addrspace(4) @G to ptr)
@@ -41,7 +41,8 @@ define i32 @test_const_as_call1() {
 ; CHECK-LABEL: define {{[^@]+}}@test_const_as_call1
 ; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:    [[P1:%.*]] = call ptr addrspace(4) @ptr_to_const() #[[ATTR4:[0-9]+]]
-; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr addrspace(4) [[P1]], align 4
+; CHECK-NEXT:    [[C1:%.*]] = addrspacecast ptr addrspace(4) [[P1]] to ptr
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[C1]], align 4
 ; CHECK-NEXT:    ret i32 [[L1]]
 ;
   %p1 = call ptr addrspace(4) @ptr_to_const()
@@ -71,7 +72,8 @@ define i32 @test_shared_as_call1() {
 ; CHECK-LABEL: define {{[^@]+}}@test_shared_as_call1
 ; CHECK-SAME: () #[[ATTR2]] {
 ; CHECK-NEXT:    [[P1:%.*]] = call ptr addrspace(3) @ptr_to_shared() #[[ATTR4]]
-; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr addrspace(3) [[P1]], align 4
+; CHECK-NEXT:    [[C1:%.*]] = addrspacecast ptr addrspace(3) [[P1]] to ptr
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[C1]], align 4
 ; CHECK-NEXT:    ret i32 [[L1]]
 ;
   %p1 = call ptr addrspace(3) @ptr_to_shared()
diff --git a/llvm/test/Transforms/Attributor/nocapture-1.ll b/llvm/test/Transforms/Attributor/nocapture-1.ll
index 3401ddfdd7d758..de5f31e470edfc 100644
--- a/llvm/test/Transforms/Attributor/nocapture-1.ll
+++ b/llvm/test/Transforms/Attributor/nocapture-1.ll
@@ -257,7 +257,7 @@ define i32 @nc1_addrspace(ptr %q, ptr addrspace(1) %p, i1 %b) {
 ; TUNIT-NEXT:    [[TMP:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
 ; TUNIT-NEXT:    [[TMP2:%.*]] = select i1 [[B]], ptr [[TMP]], ptr [[Q]]
 ; TUNIT-NEXT:    [[VAL:%.*]] = load i32, ptr [[TMP2]], align 4
-; TUNIT-NEXT:    store i32 0, ptr addrspace(1) [[P]], align 4
+; TUNIT-NEXT:    store i32 0, ptr [[TMP]], align 4
 ; TUNIT-NEXT:    store ptr [[Q]], ptr @g, align 8
 ; TUNIT-NEXT:    ret i32 [[VAL]]
 ;
@@ -272,7 +272,7 @@ define i32 @nc1_addrspace(ptr %q, ptr addrspace(1) %p, i1 %b) {
 ; CGSCC-NEXT:    [[TMP:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
 ; CGSCC-NEXT:    [[TMP2:%.*]] = select i1 [[B]], ptr [[TMP]], ptr [[Q]]
 ; CGSCC-NEXT:    [[VAL:%.*]] = load i32, ptr [[TMP2]], align 4
-; CGSCC-NEXT:    store i32 0, ptr addrspace(1) [[P]], align 4
+; CGSCC-NEXT:    store i32 0, ptr [[TMP]], align 4
 ; CGSCC-NEXT:    store ptr [[Q]], ptr @g, align 8
 ; CGSCC-NEXT:    ret i32 [[VAL]]
 ;
diff --git a/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll b/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll
index 518ed97f42bc10..fda72a6e31a0c7 100644
--- a/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll
+++ b/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll
@@ -116,7 +116,6 @@ cond.end:                                         ; preds = %cond.true, %entry
 ; CHECK-LABEL: define {{[^@]+}}@_ZN6Kokkos4Impl14SubviewExtentsILj2ELj1EE3setIJLm0ELm0EEJiEEEbjjRKNS0_13ViewDimensionIJXspT_EEEENS0_5ALL_tEDpT0_.internalized
 ; CHECK-SAME: (ptr nocapture writeonly [[THIS:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[THIS]] to ptr addrspace(5)
 ; CHECK-NEXT:    ret i1 false
 ;
 ;
diff --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll
index 68f179c88116e4..a5789790cc92a1 100644
--- a/llvm/test/Transforms/Attributor/value-simplify.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify.ll
@@ -838,8 +838,7 @@ define void @user() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
 ; TUNIT-LABEL: define {{[^@]+}}@user
 ; TUNIT-SAME: () #[[ATTR5]] {
-; TUNIT-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspacecast (ptr addrspace(3) @ConstAS3Ptr to ptr) to ptr addrspace(3)
-; TUNIT-NEXT:    store i32 0, ptr addrspace(3) [[TMP1]], align 4
+; TUNIT-NEXT:    store i32 0, ptr addrspacecast (ptr addrspace(3) @ConstAS3Ptr to ptr), align 4
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(write)
diff --git a/llvm/test/Transforms/OpenMP/nested_parallelism.ll b/llvm/test/Transforms/OpenMP/nested_parallelism.ll
index 5c4386b24a3d5a..4f4a87cbddfec1 100644
--- a/llvm/test/Transforms/OpenMP/nested_parallelism.ll
+++ b/llvm/test/Transforms/OpenMP/nested_parallelism.ll
@@ -60,7 +60,7 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l13(ptr %dyn,
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[REGION_GUARDED_I:%.*]], label [[_Z3FOOI_INTERNALIZED_EXIT:%.*]]
 ; CHECK:       region.guarded.i:
 ; CHECK-NEXT:    [[I_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[I:%.*]] to i32
-; CHECK-NEXT:    store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(3) @i_shared, align 16
+; CHECK-NEXT:    store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), align 16
 ; CHECK-NEXT:    br label [[_Z3FOOI_INTERNALIZED_EXIT]]
 ; CHECK:       _Z3fooi.internalized.exit:
 ; CHECK-NEXT:    tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]]
@@ -140,7 +140,7 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l16(ptr %dyn,
 ; CHECK-NEXT:    [[I_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[I:%.*]] to i32
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
-; CHECK-NEXT:    store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(3) @i.i_shared, align 16
+; CHECK-NEXT:    store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), align 16
 ; CHECK-NEXT:    store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
 ; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
diff --git a/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll b/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll
index 52be16c41f872d..ce7b4f89b893ff 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll
@@ -27,22 +27,21 @@ define i32 @fputs() {
 define internal i32 @__kmpc_target_init(ptr %0, ptr %dyn) {
 ; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init
 ; AMDGPU-SAME: (ptr [[TMP0:%.*]], ptr [[DYN:%.*]]) #[[ATTR1:[0-9]+]] {
-; AMDGPU-NEXT:    [[TMP2:%.*]] = addrspacecast ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(1) @__omp_offloading_10302_b20a40e_main_l4_kernel_environment to ptr), i64 2) to ptr addrspace(1)
-; AMDGPU-NEXT:    [[TMP3:%.*]] = load i8, ptr addrspace(1) [[TMP2]], align 2
-; AMDGPU-NEXT:    [[TMP4:%.*]] = and i8 [[TMP3]], 2
-; AMDGPU-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; AMDGPU-NEXT:    [[TMP6:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR3:[0-9]+]]
-; AMDGPU-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
-; AMDGPU-NEXT:    [[OR_COND:%.*]] = select i1 [[TMP5]], i1 [[TMP7]], i1 false
-; AMDGPU-NEXT:    br i1 [[OR_COND]], label [[TMP8:%.*]], label [[TMP9:%.*]]
-; AMDGPU:       8:
+; AMDGPU-NEXT:    [[TMP2:%.*]] = load i8, ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(1) @__omp_offloading_10302_b20a40e_main_l4_kernel_environment to ptr), i64 2), align 2
+; AMDGPU-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], 2
+; AMDGPU-NEXT:    [[TMP4:%.*]] = icmp ne i8 [[TMP3]], 0
+; AMDGPU-NEXT:    [[TMP5:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR3:[0-9]+]]
+; AMDGPU-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; AMDGPU-NEXT:    [[OR_COND:%.*]] = select i1 [[TMP4]], i1 [[TMP6]], i1 false
+; AMDGPU-NEXT:    br i1 [[OR_COND]], label [[TMP7:%.*]], label [[TMP8:%.*]]
+; AMDGPU:       7:
 ; AMDGPU-NEXT:    store i8 0, ptr addrspace(3) null, align 2147483648
-; AMDGPU-NEXT:    br label [[TMP9]]
+; AMDGPU-NEXT:    br label [[TMP8]]
+; AMDGPU:       8:
+; AMDGPU-NEXT:    br label [[TMP10:%.*]]
 ; AMDGPU:       9:
-; AMDGPU-NEXT:    br label [[TMP11:%.*]]
-; AMDGPU:       10:
 ; AMDGPU-NEXT:    unreachable
-; AMDGPU:       11:
+; AMDGPU:       10:
 ; AMDGPU-NEXT:    ret i32 0
 ;
   %2 = getelementptr %struct.ConfigurationEnvironmentTy.8, ptr %0, i64 0, i32 2



More information about the llvm-branch-commits mailing list