[llvm] [Attributor] Don't replace `addrspacecast (ptr null to ptr addrspace(x))` with `ptr addrspace(x) null` (PR #126779)

Tue May 20 09:03:33 PDT 2025

https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/126779

>From 2bbd606f980fb9f0d5fa8a0fed371c637b8b5507 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Tue, 20 May 2025 12:03:16 -0400
Subject: [PATCH] [Attributor] Don't replace `AddrSpaceCast` with
 `ConstantPointerNull`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`ConstantPointerNull` represents a pointer with value 0, but it doesn’t
necessarily mean a nullptr. `ptr addrspace(x) null` is not the same as
`addrspacecast (ptr null to ptr addrspace(x))` if the nullptr in AS x is not
zero. Therefore, we can't simply replace it.

Fixes #115083.
---
 llvm/lib/Transforms/IPO/Attributor.cpp        |  2 +-
 llvm/test/CodeGen/AMDGPU/addrspacecast.ll     |  4 +-
 ...-addrspacecast-with-constantpointernull.ll | 71 +++++++++++++++++++
 .../Attributor/AMDGPU/lit.local.cfg           |  2 +
 4 files changed, 76 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Transforms/Attributor/AMDGPU/do-not-replace-addrspacecast-with-constantpointernull.ll
 create mode 100644 llvm/test/Transforms/Attributor/AMDGPU/lit.local.cfg

diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index e432f0cb7d897..47f3fd20148f7 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -325,7 +325,7 @@ Value *AA::getWithType(Value &V, Type &Ty) {
   if (isa<UndefValue>(V))
     return UndefValue::get(&Ty);
   if (auto *C = dyn_cast<Constant>(&V)) {
-    if (C->isNullValue())
+    if (C->isNullValue() && !Ty.isPtrOrPtrVectorTy())
       return Constant::getNullValue(&Ty);
     if (C->getType()->isPointerTy() && Ty.isPointerTy())
       return ConstantExpr::getPointerCast(C, &Ty);
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
index f176f34f84736..a6a0a9a3c9015 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
@@ -217,7 +217,7 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 {
 ; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
 
 ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s{{[0-9]+}}
 ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
 ; HSA: flat_store_dword v[[[LO]]:[[HI]]], v[[K]]
 define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
@@ -260,7 +260,7 @@ define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
 ; FIXME: Shouldn't need to enable queue ptr
 ; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast:
 ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s{{[0-9]+}}
 ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
 ; HSA: flat_store_dword v[[[LO]]:[[HI]]], v[[K]]
 define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
diff --git a/llvm/test/Transforms/Attributor/AMDGPU/do-not-replace-addrspacecast-with-constantpointernull.ll b/llvm/test/Transforms/Attributor/AMDGPU/do-not-replace-addrspacecast-with-constantpointernull.ll
new file mode 100644
index 0000000000000..0d0f4af286ce5
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/AMDGPU/do-not-replace-addrspacecast-with-constantpointernull.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=attributor %s -o - | FileCheck %s
+
+define i32 @addrspacecast_ptr(ptr %p0, ptr addrspace(5) %p5) {
+; CHECK-LABEL: define i32 @addrspacecast_ptr(
+; CHECK-SAME: ptr nofree readonly captures(none) [[P0:%.*]], ptr addrspace(5) nofree readonly [[P5:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp eq ptr addrspace(5) [[P5]], addrspacecast (ptr null to ptr addrspace(5))
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[ICMP]], ptr [[P0]], ptr null
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[LOAD]]
+;
+  %icmp = icmp eq ptr addrspace(5) %p5, addrspacecast (ptr null to ptr addrspace(5))
+  %select = select i1 %icmp, ptr %p0, ptr null
+  %load = load i32, ptr %select, align 4
+  ret i32 %load
+}
+
+define i32 @vec_addrspacecast_ptr(ptr %p0, ptr %p1, <2 x ptr addrspace(5)> %ptrvec) {
+; CHECK-LABEL: define i32 @vec_addrspacecast_ptr(
+; CHECK-SAME: ptr nofree readonly captures(none) [[P0:%.*]], ptr nofree noundef nonnull readonly align 16 captures(none) dereferenceable(16) [[P1:%.*]], <2 x ptr addrspace(5)> [[PTRVEC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[LOADVEC:%.*]] = load <2 x ptr addrspace(5)>, ptr [[P1]], align 16
+; CHECK-NEXT:    [[ICMPVEC:%.*]] = icmp eq <2 x ptr addrspace(5)> [[LOADVEC]], <ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5))>
+; CHECK-NEXT:    [[ICMP:%.*]] = extractelement <2 x i1> [[ICMPVEC]], i32 1
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[ICMP]], ptr [[P0]], ptr null
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[LOAD]]
+;
+  %loadvec = load <2 x ptr addrspace(5)>, ptr %p1, align 16
+  %icmpvec = icmp eq <2 x ptr addrspace(5)> %loadvec, <ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5))>
+  %icmp = extractelement <2 x i1> %icmpvec, i32 1
+  %select = select i1 %icmp, ptr %p0, ptr null
+  %load = load i32, ptr %select, align 4
+  ret i32 %load
+}
+
+define i32 @addrspacecast_vec_as1_ptr(ptr %p0, ptr %p1, <2 x ptr addrspace(5)> %ptrvec) {
+; CHECK-LABEL: define i32 @addrspacecast_vec_as1_ptr(
+; CHECK-SAME: ptr nofree readonly captures(none) [[P0:%.*]], ptr nofree noundef nonnull readonly align 16 captures(none) dereferenceable(16) [[P1:%.*]], <2 x ptr addrspace(5)> [[PTRVEC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[LOADVEC:%.*]] = load <2 x ptr addrspace(5)>, ptr [[P1]], align 16
+; CHECK-NEXT:    [[ICMPVEC:%.*]] = icmp eq <2 x ptr addrspace(5)> [[LOADVEC]], <ptr addrspace(5) addrspacecast (ptr addrspace(1) null to ptr addrspace(5)), ptr addrspace(5) addrspacecast (ptr addrspace(1) null to ptr addrspace(5))>
+; CHECK-NEXT:    [[ICMP:%.*]] = extractelement <2 x i1> [[ICMPVEC]], i32 1
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[ICMP]], ptr [[P0]], ptr null
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[LOAD]]
+;
+  %loadvec = load <2 x ptr addrspace(5)>, ptr %p1, align 16
+  %icmpvec = icmp eq <2 x ptr addrspace(5)> %loadvec, addrspacecast (<2 x ptr addrspace(1)> zeroinitializer to <2 x ptr addrspace(5)>)
+  %icmp = extractelement <2 x i1> %icmpvec, i32 1
+  %select = select i1 %icmp, ptr %p0, ptr null
+  %load = load i32, ptr %select, align 4
+  ret i32 %load
+}
+
+define i32 @addrspacecast_vec_ptr(ptr %p0, ptr %p1, <2 x ptr addrspace(5)> %ptrvec) {
+; CHECK-LABEL: define i32 @addrspacecast_vec_ptr(
+; CHECK-SAME: ptr nofree readonly captures(none) [[P0:%.*]], ptr nofree noundef nonnull readonly align 16 captures(none) dereferenceable(16) [[P1:%.*]], <2 x ptr addrspace(5)> [[PTRVEC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[LOADVEC:%.*]] = load <2 x ptr addrspace(5)>, ptr [[P1]], align 16
+; CHECK-NEXT:    [[ICMPVEC:%.*]] = icmp eq <2 x ptr addrspace(5)> [[LOADVEC]], <ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5))>
+; CHECK-NEXT:    [[ICMP:%.*]] = extractelement <2 x i1> [[ICMPVEC]], i32 1
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[ICMP]], ptr [[P0]], ptr null
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[LOAD]]
+;
+  %loadvec = load <2 x ptr addrspace(5)>, ptr %p1, align 16
+  %icmpvec = icmp eq <2 x ptr addrspace(5)> %loadvec, addrspacecast (<2 x ptr> zeroinitializer to <2 x ptr addrspace(5)>)
+  %icmp = extractelement <2 x i1> %icmpvec, i32 1
+  %select = select i1 %icmp, ptr %p0, ptr null
+  %load = load i32, ptr %select, align 4
+  ret i32 %load
+}
+
diff --git a/llvm/test/Transforms/Attributor/AMDGPU/lit.local.cfg b/llvm/test/Transforms/Attributor/AMDGPU/lit.local.cfg
new file mode 100644
index 0000000000000..7c492428aec76
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/AMDGPU/lit.local.cfg
@@ -0,0 +1,2 @@
+if not "AMDGPU" in config.root.targets:
+    config.unsupported = True