[llvm] [Attributor] Enable `AAAddressSpace` in `OpenMPOpt` (PR #104363)

Shilei Tian via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 14 20:44:37 PDT 2024


https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/104363

>From 2e1f5f1f3cf5f6e25c6a643cb2b995811f8136c0 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Wed, 14 Aug 2024 21:38:10 -0400
Subject: [PATCH] Reapply "[Attributor] Enable AAAddressSpace for OpenMPOpt
 (#65544)"

This reverts commit e592c2dcf5b7d2da6c2564f5d9990aa34079bad4.
---
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp         |  4 +++
 .../reduced/openmp_opt_constant_type_crash.ll |  1 +
 .../test/Transforms/OpenMP/barrier_removal.ll | 16 ++++++------
 .../heap-to-shared-missing-declarations.ll    |  2 +-
 .../Transforms/OpenMP/nested_parallelism.ll   |  4 +--
 .../OpenMP/spmdization_kernel_env_dep.ll      | 25 ++++++++++---------
 6 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 798bcb77328fc8..baf39c908706c6 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -5569,6 +5569,8 @@ void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) {
       bool UsedAssumedInformation = false;
       A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr,
                              UsedAssumedInformation, AA::Interprocedural);
+      A.getOrCreateAAFor<AAAddressSpace>(
+          IRPosition::value(*LI->getPointerOperand()));
       continue;
     }
     if (auto *CI = dyn_cast<CallBase>(&I)) {
@@ -5578,6 +5580,8 @@ void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) {
     }
     if (auto *SI = dyn_cast<StoreInst>(&I)) {
       A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI));
+      A.getOrCreateAAFor<AAAddressSpace>(
+          IRPosition::value(*SI->getPointerOperand()));
       continue;
     }
     if (auto *FI = dyn_cast<FenceInst>(&I)) {
diff --git a/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll b/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll
index fda72a6e31a0c7..518ed97f42bc10 100644
--- a/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll
+++ b/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll
@@ -116,6 +116,7 @@ cond.end:                                         ; preds = %cond.true, %entry
 ; CHECK-LABEL: define {{[^@]+}}@_ZN6Kokkos4Impl14SubviewExtentsILj2ELj1EE3setIJLm0ELm0EEJiEEEbjjRKNS0_13ViewDimensionIJXspT_EEEENS0_5ALL_tEDpT0_.internalized
 ; CHECK-SAME: (ptr nocapture writeonly [[THIS:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[THIS]] to ptr addrspace(5)
 ; CHECK-NEXT:    ret i1 false
 ;
 ;
diff --git a/llvm/test/Transforms/OpenMP/barrier_removal.ll b/llvm/test/Transforms/OpenMP/barrier_removal.ll
index 0925418cf54c72..47a5d5104aa8bd 100644
--- a/llvm/test/Transforms/OpenMP/barrier_removal.ll
+++ b/llvm/test/Transforms/OpenMP/barrier_removal.ll
@@ -269,10 +269,9 @@ define void @neg_empty_2() "kernel" {
 define void @pos_constant_loads() "kernel" {
 ; CHECK-LABEL: define {{[^@]+}}@pos_constant_loads
 ; CHECK-SAME: () #[[ATTR4]] {
-; CHECK-NEXT:    [[ARG:%.*]] = load ptr addrspace(4), ptr addrspacecast (ptr addrspace(4) @GPtr4 to ptr), align 8
-; CHECK-NEXT:    [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @GC2 to ptr), align 4
-; CHECK-NEXT:    [[ARGC:%.*]] = addrspacecast ptr addrspace(4) [[ARG]] to ptr
-; CHECK-NEXT:    [[C:%.*]] = load i32, ptr [[ARGC]], align 4
+; CHECK-NEXT:    [[ARG:%.*]] = load ptr addrspace(4), ptr addrspace(4) @GPtr4, align 8
+; CHECK-NEXT:    [[B:%.*]] = load i32, ptr addrspace(4) @GC2, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, ptr addrspace(4) [[ARG]], align 4
 ; CHECK-NEXT:    [[D:%.*]] = add i32 42, [[B]]
 ; CHECK-NEXT:    [[E:%.*]] = add i32 [[D]], [[C]]
 ; CHECK-NEXT:    call void @useI32(i32 [[E]])
@@ -303,7 +302,7 @@ define void @neg_loads() "kernel" {
 ; CHECK-NEXT:    [[ARG:%.*]] = load ptr, ptr @GPtr, align 8
 ; CHECK-NEXT:    [[A:%.*]] = load i32, ptr @G, align 4
 ; CHECK-NEXT:    call void @aligned_barrier()
-; CHECK-NEXT:    [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @GS to ptr), align 4
+; CHECK-NEXT:    [[B:%.*]] = load i32, ptr addrspace(3) @GS, align 4
 ; CHECK-NEXT:    call void @aligned_barrier()
 ; CHECK-NEXT:    [[C:%.*]] = load i32, ptr [[ARG]], align 4
 ; CHECK-NEXT:    call void @aligned_barrier()
@@ -335,9 +334,8 @@ define void @pos_priv_mem() "kernel" {
 ; CHECK-NEXT:    [[LOC:%.*]] = alloca i32, align 4, addrspace(5)
 ; CHECK-NEXT:    [[A:%.*]] = load i32, ptr @PG1, align 4
 ; CHECK-NEXT:    store i32 [[A]], ptr addrspace(5) [[LOC]], align 4
-; CHECK-NEXT:    [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(5) @PG2 to ptr), align 4
-; CHECK-NEXT:    [[ARGC:%.*]] = addrspacecast ptr addrspace(5) [[ARG]] to ptr
-; CHECK-NEXT:    store i32 [[B]], ptr [[ARGC]], align 4
+; CHECK-NEXT:    [[B:%.*]] = load i32, ptr addrspace(5) @PG2, align 4
+; CHECK-NEXT:    store i32 [[B]], ptr addrspace(5) [[ARG]], align 4
 ; CHECK-NEXT:    [[V:%.*]] = load i32, ptr addrspace(5) [[LOC]], align 4
 ; CHECK-NEXT:    store i32 [[V]], ptr @PG1, align 4
 ; CHECK-NEXT:    ret void
@@ -370,7 +368,7 @@ define void @neg_mem() "kernel" {
 ; CHECK-NEXT:    store i32 [[A]], ptr [[ARG]], align 4
 ; CHECK-NEXT:    fence release
 ; CHECK-NEXT:    call void @aligned_barrier()
-; CHECK-NEXT:    [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @G2 to ptr), align 4
+; CHECK-NEXT:    [[B:%.*]] = load i32, ptr addrspace(1) @G2, align 4
 ; CHECK-NEXT:    store i32 [[B]], ptr @G1, align 4
 ; CHECK-NEXT:    fence acquire
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/OpenMP/heap-to-shared-missing-declarations.ll b/llvm/test/Transforms/OpenMP/heap-to-shared-missing-declarations.ll
index 7a5bba75f6c48c..d81f34f3c42737 100644
--- a/llvm/test/Transforms/OpenMP/heap-to-shared-missing-declarations.ll
+++ b/llvm/test/Transforms/OpenMP/heap-to-shared-missing-declarations.ll
@@ -22,7 +22,7 @@ define internal void @func() {
 ; CHECK-LABEL: define {{[^@]+}}@func
 ; CHECK-SAME: () #[[ATTR1]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[I:%.*]] = load ptr, ptr null, align 4294967296
+; CHECK-NEXT:    [[I:%.*]] = load ptr, ptr addrspace(5) null, align 4294967296
 ; CHECK-NEXT:    store i64 0, ptr [[I]], align 8
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/OpenMP/nested_parallelism.ll b/llvm/test/Transforms/OpenMP/nested_parallelism.ll
index 4f4a87cbddfec1..5c4386b24a3d5a 100644
--- a/llvm/test/Transforms/OpenMP/nested_parallelism.ll
+++ b/llvm/test/Transforms/OpenMP/nested_parallelism.ll
@@ -60,7 +60,7 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l13(ptr %dyn,
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[REGION_GUARDED_I:%.*]], label [[_Z3FOOI_INTERNALIZED_EXIT:%.*]]
 ; CHECK:       region.guarded.i:
 ; CHECK-NEXT:    [[I_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[I:%.*]] to i32
-; CHECK-NEXT:    store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), align 16
+; CHECK-NEXT:    store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(3) @i_shared, align 16
 ; CHECK-NEXT:    br label [[_Z3FOOI_INTERNALIZED_EXIT]]
 ; CHECK:       _Z3fooi.internalized.exit:
 ; CHECK-NEXT:    tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]]
@@ -140,7 +140,7 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l16(ptr %dyn,
 ; CHECK-NEXT:    [[I_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[I:%.*]] to i32
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
-; CHECK-NEXT:    store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), align 16
+; CHECK-NEXT:    store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(3) @i.i_shared, align 16
 ; CHECK-NEXT:    store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
 ; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
diff --git a/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll b/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll
index ce7b4f89b893ff..52be16c41f872d 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll
@@ -27,21 +27,22 @@ define i32 @fputs() {
 define internal i32 @__kmpc_target_init(ptr %0, ptr %dyn) {
 ; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init
 ; AMDGPU-SAME: (ptr [[TMP0:%.*]], ptr [[DYN:%.*]]) #[[ATTR1:[0-9]+]] {
-; AMDGPU-NEXT:    [[TMP2:%.*]] = load i8, ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(1) @__omp_offloading_10302_b20a40e_main_l4_kernel_environment to ptr), i64 2), align 2
-; AMDGPU-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], 2
-; AMDGPU-NEXT:    [[TMP4:%.*]] = icmp ne i8 [[TMP3]], 0
-; AMDGPU-NEXT:    [[TMP5:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR3:[0-9]+]]
-; AMDGPU-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
-; AMDGPU-NEXT:    [[OR_COND:%.*]] = select i1 [[TMP4]], i1 [[TMP6]], i1 false
-; AMDGPU-NEXT:    br i1 [[OR_COND]], label [[TMP7:%.*]], label [[TMP8:%.*]]
-; AMDGPU:       7:
-; AMDGPU-NEXT:    store i8 0, ptr addrspace(3) null, align 2147483648
-; AMDGPU-NEXT:    br label [[TMP8]]
+; AMDGPU-NEXT:    [[TMP2:%.*]] = addrspacecast ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(1) @__omp_offloading_10302_b20a40e_main_l4_kernel_environment to ptr), i64 2) to ptr addrspace(1)
+; AMDGPU-NEXT:    [[TMP3:%.*]] = load i8, ptr addrspace(1) [[TMP2]], align 2
+; AMDGPU-NEXT:    [[TMP4:%.*]] = and i8 [[TMP3]], 2
+; AMDGPU-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
+; AMDGPU-NEXT:    [[TMP6:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR3:[0-9]+]]
+; AMDGPU-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
+; AMDGPU-NEXT:    [[OR_COND:%.*]] = select i1 [[TMP5]], i1 [[TMP7]], i1 false
+; AMDGPU-NEXT:    br i1 [[OR_COND]], label [[TMP8:%.*]], label [[TMP9:%.*]]
 ; AMDGPU:       8:
-; AMDGPU-NEXT:    br label [[TMP10:%.*]]
+; AMDGPU-NEXT:    store i8 0, ptr addrspace(3) null, align 2147483648
+; AMDGPU-NEXT:    br label [[TMP9]]
 ; AMDGPU:       9:
-; AMDGPU-NEXT:    unreachable
+; AMDGPU-NEXT:    br label [[TMP11:%.*]]
 ; AMDGPU:       10:
+; AMDGPU-NEXT:    unreachable
+; AMDGPU:       11:
 ; AMDGPU-NEXT:    ret i32 0
 ;
   %2 = getelementptr %struct.ConfigurationEnvironmentTy.8, ptr %0, i64 0, i32 2



More information about the llvm-commits mailing list