[llvm] InstCombine: Process addrspacecast uses in PointerReplacer (PR #91953)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon May 13 06:08:34 PDT 2024


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/91953

>From c03105c85ec93b0be0947134f175b6c5d0d56fb4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 9 May 2024 23:23:25 +0200
Subject: [PATCH 1/2] InstCombine: Add baseline test for issue #68120

---
 .../InstCombine/AMDGPU/issue68120.ll          | 78 +++++++++++++++++++
 1 file changed, 78 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/AMDGPU/issue68120.ll

diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/issue68120.ll b/llvm/test/Transforms/InstCombine/AMDGPU/issue68120.ll
new file mode 100644
index 0000000000000..176aa2ebe6c19
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/issue68120.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine %s | FileCheck %s
+
+; It is illegal to pass ptr addrspace(4) in %arg by addrspacecasting
+; to ptr addrspace(5) to pass off to the stack argument. A temporary
+; alloca and memcpy is necessary.
+define void @issue68120_invalid_addrspacecast_introduced_0(ptr addrspace(4) byref([56 x i8]) %arg) {
+; CHECK-LABEL: define void @issue68120_invalid_addrspacecast_introduced_0(
+; CHECK-SAME: ptr addrspace(4) byref([56 x i8]) [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ADDRSPACECAST_0_TO_5:%.*]] = addrspacecast ptr addrspace(4) [[ARG]] to ptr addrspace(5)
+; CHECK-NEXT:    call void @byval_func(ptr addrspace(5) [[ADDRSPACECAST_0_TO_5]])
+; CHECK-NEXT:    ret void
+;
+  %alloca = alloca [56 x i8], addrspace(5)
+  %alloca1 = alloca [56 x i8], addrspace(5)
+  call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca, ptr addrspace(4) %arg, i64 56, i1 false)
+  call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca, i64 56, i1 false)
+  %addrspacecast.alloca1 = addrspacecast ptr addrspace(5) %alloca1 to ptr
+  %addrspacecast.0.to.5 = addrspacecast ptr %addrspacecast.alloca1 to ptr addrspace(5)
+  call void @byval_func(ptr addrspace(5) %addrspacecast.0.to.5)
+  ret void
+}
+
+; Further reduced variant that already eliminated one of the copies
+define void @issue68120_invalid_addrspacecast_introduced_1(ptr addrspace(4) byref([56 x i8]) %arg) {
+; CHECK-LABEL: define void @issue68120_invalid_addrspacecast_introduced_1(
+; CHECK-SAME: ptr addrspace(4) byref([56 x i8]) [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ADDRSPACECAST_0_TO_5:%.*]] = addrspacecast ptr addrspace(4) [[ARG]] to ptr addrspace(5)
+; CHECK-NEXT:    call void @byval_func(ptr addrspace(5) [[ADDRSPACECAST_0_TO_5]])
+; CHECK-NEXT:    ret void
+;
+  %alloca = alloca [56 x i8], addrspace(5)
+  call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca, ptr addrspace(4) %arg, i64 56, i1 false)
+  %addrspacecast.alloca = addrspacecast ptr addrspace(5) %alloca to ptr
+  %addrspacecast.0.to.5 = addrspacecast ptr %addrspacecast.alloca to ptr addrspace(5)
+  call void @byval_func(ptr addrspace(5) %addrspacecast.0.to.5)
+  ret void
+}
+
+define void @issue68120_use_cast_to_as0(ptr addrspace(4) byref([56 x i8]) %arg) {
+; CHECK-LABEL: define void @issue68120_use_cast_to_as0(
+; CHECK-SAME: ptr addrspace(4) byref([56 x i8]) [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [56 x i8], align 1, addrspace(5)
+; CHECK-NEXT:    call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef align 1 dereferenceable(56) [[ALLOCA]], ptr addrspace(4) noundef align 1 dereferenceable(56) [[ARG]], i64 56, i1 false)
+; CHECK-NEXT:    [[ADDRSPACECAST_ALLOCA:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT:    call void @other_func(ptr [[ADDRSPACECAST_ALLOCA]])
+; CHECK-NEXT:    ret void
+;
+  %alloca = alloca [56 x i8], addrspace(5)
+  call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %alloca, ptr addrspace(4) %arg, i64 56, i1 false)
+  %addrspacecast.alloca = addrspacecast ptr addrspace(5) %alloca to ptr
+  %addrspacecast.0.to.5 = addrspacecast ptr %addrspacecast.alloca to ptr addrspace(5)
+  call void @other_func(ptr %addrspacecast.alloca)
+  ret void
+}
+
+define void @issue68120_uses_invalid_addrspacecast(ptr addrspace(4) byref([56 x i8]) %arg) {
+; CHECK-LABEL: define void @issue68120_uses_invalid_addrspacecast(
+; CHECK-SAME: ptr addrspace(4) byref([56 x i8]) [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ADDRSPACECAST_0_TO_5:%.*]] = addrspacecast ptr addrspace(4) [[ARG]] to ptr addrspace(5)
+; CHECK-NEXT:    call void @byval_func(ptr addrspace(5) [[ADDRSPACECAST_0_TO_5]])
+; CHECK-NEXT:    ret void
+;
+  %alloca = alloca [56 x i8], addrspace(5)
+  %alloca1 = alloca [56 x i8], addrspace(5)
+  %illegal.cast = addrspacecast ptr addrspace(4) %arg to ptr addrspace(5)
+  call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) %alloca, ptr addrspace(5) %illegal.cast, i64 56, i1 false)
+  call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca, i64 56, i1 false)
+  %addrspacecast.alloca1 = addrspacecast ptr addrspace(5) %alloca1 to ptr
+  %addrspacecast.0.to.5 = addrspacecast ptr %addrspacecast.alloca1 to ptr addrspace(5)
+  call void @byval_func(ptr addrspace(5) %addrspacecast.0.to.5)
+  ret void
+}
+
+
+declare void @byval_func(ptr addrspace(5) byval([56 x i8]))
+declare void @other_func(ptr)
+

>From 1a27cee12ccaed22b94128614107881a8646e57b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 10 May 2024 13:51:03 +0200
Subject: [PATCH 2/2] InstCombine: Process addrspacecast uses in
 PointerReplacer

This was looking through an addrspacecast, and not finding a later
unfoldable cast to another address space. Fixes improperly deleting
a required alloca + memcpy and introducing an illegal addrspacecast.

This also required fixing some worklist management issues with
addrspacecast, and assuming that only memcpy sources could need
replacement.

Regresses one test function, but this looks like it optimized
before by accident. It never saw the pointer use by the call
to readonly_callee, which should require insertion of a new cast.

Fixes #68120
---
 .../InstCombineLoadStoreAlloca.cpp            | 33 ++++++++++---------
 .../InstCombine/AMDGPU/issue68120.ll          |  9 +++--
 .../InstCombine/ptr-replace-alloca.ll         |  6 +++-
 3 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 537890d9025fe..4351a55ea1d30 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -342,9 +342,13 @@ bool PointerReplacer::collectUsersRecursive(Instruction &I) {
       Worklist.insert(Inst);
     } else if (isEqualOrValidAddrSpaceCast(Inst, FromAS)) {
       Worklist.insert(Inst);
+      if (!collectUsersRecursive(*Inst))
+        return false;
     } else if (Inst->isLifetimeStartOrEnd()) {
       continue;
     } else {
+      // TODO: For arbitrary uses with address space mismatches, should we check
+      // if we can introduce a valid addrspacecast?
       LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *U << '\n');
       return false;
     }
@@ -406,20 +410,18 @@ void PointerReplacer::replace(Instruction *I) {
     NewSI->takeName(SI);
     WorkMap[SI] = NewSI;
   } else if (auto *MemCpy = dyn_cast<MemTransferInst>(I)) {
-    auto *SrcV = getReplacement(MemCpy->getRawSource());
-    // The pointer may appear in the destination of a copy, but we don't want to
-    // replace it.
-    if (!SrcV) {
-      assert(getReplacement(MemCpy->getRawDest()) &&
-             "destination not in replace list");
-      return;
-    }
+    auto *DestV = MemCpy->getRawDest();
+    auto *SrcV = MemCpy->getRawSource();
+
+    if (auto *DestReplace = getReplacement(DestV))
+      DestV = DestReplace;
+    if (auto *SrcReplace = getReplacement(SrcV))
+      SrcV = SrcReplace;
 
     IC.Builder.SetInsertPoint(MemCpy);
     auto *NewI = IC.Builder.CreateMemTransferInst(
-        MemCpy->getIntrinsicID(), MemCpy->getRawDest(), MemCpy->getDestAlign(),
-        SrcV, MemCpy->getSourceAlign(), MemCpy->getLength(),
-        MemCpy->isVolatile());
+        MemCpy->getIntrinsicID(), DestV, MemCpy->getDestAlign(), SrcV,
+        MemCpy->getSourceAlign(), MemCpy->getLength(), MemCpy->isVolatile());
     AAMDNodes AAMD = MemCpy->getAAMetadata();
     if (AAMD)
       NewI->setAAMetadata(AAMD);
@@ -432,16 +434,17 @@ void PointerReplacer::replace(Instruction *I) {
     assert(isEqualOrValidAddrSpaceCast(
                ASC, V->getType()->getPointerAddressSpace()) &&
            "Invalid address space cast!");
-    auto *NewV = V;
+
     if (V->getType()->getPointerAddressSpace() !=
         ASC->getType()->getPointerAddressSpace()) {
       auto *NewI = new AddrSpaceCastInst(V, ASC->getType(), "");
       NewI->takeName(ASC);
       IC.InsertNewInstWith(NewI, ASC->getIterator());
-      NewV = NewI;
+      WorkMap[ASC] = NewI;
+    } else {
+      WorkMap[ASC] = V;
     }
-    IC.replaceInstUsesWith(*ASC, NewV);
-    IC.eraseInstFromFunction(*ASC);
+
   } else {
     llvm_unreachable("should never reach here");
   }
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/issue68120.ll b/llvm/test/Transforms/InstCombine/AMDGPU/issue68120.ll
index 176aa2ebe6c19..346c64f096ba3 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/issue68120.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/issue68120.ll
@@ -7,7 +7,8 @@
 define void @issue68120_invalid_addrspacecast_introduced_0(ptr addrspace(4) byref([56 x i8]) %arg) {
 ; CHECK-LABEL: define void @issue68120_invalid_addrspacecast_introduced_0(
 ; CHECK-SAME: ptr addrspace(4) byref([56 x i8]) [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ADDRSPACECAST_0_TO_5:%.*]] = addrspacecast ptr addrspace(4) [[ARG]] to ptr addrspace(5)
+; CHECK-NEXT:    [[ADDRSPACECAST_0_TO_5:%.*]] = alloca [56 x i8], align 1, addrspace(5)
+; CHECK-NEXT:    call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef align 1 dereferenceable(56) [[ADDRSPACECAST_0_TO_5]], ptr addrspace(4) noundef align 1 dereferenceable(56) [[ARG]], i64 56, i1 false)
 ; CHECK-NEXT:    call void @byval_func(ptr addrspace(5) [[ADDRSPACECAST_0_TO_5]])
 ; CHECK-NEXT:    ret void
 ;
@@ -25,7 +26,8 @@ define void @issue68120_invalid_addrspacecast_introduced_0(ptr addrspace(4) byre
 define void @issue68120_invalid_addrspacecast_introduced_1(ptr addrspace(4) byref([56 x i8]) %arg) {
 ; CHECK-LABEL: define void @issue68120_invalid_addrspacecast_introduced_1(
 ; CHECK-SAME: ptr addrspace(4) byref([56 x i8]) [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ADDRSPACECAST_0_TO_5:%.*]] = addrspacecast ptr addrspace(4) [[ARG]] to ptr addrspace(5)
+; CHECK-NEXT:    [[ADDRSPACECAST_0_TO_5:%.*]] = alloca [56 x i8], align 1, addrspace(5)
+; CHECK-NEXT:    call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef align 1 dereferenceable(56) [[ADDRSPACECAST_0_TO_5]], ptr addrspace(4) noundef align 1 dereferenceable(56) [[ARG]], i64 56, i1 false)
 ; CHECK-NEXT:    call void @byval_func(ptr addrspace(5) [[ADDRSPACECAST_0_TO_5]])
 ; CHECK-NEXT:    ret void
 ;
@@ -57,7 +59,8 @@ define void @issue68120_use_cast_to_as0(ptr addrspace(4) byref([56 x i8]) %arg)
 define void @issue68120_uses_invalid_addrspacecast(ptr addrspace(4) byref([56 x i8]) %arg) {
 ; CHECK-LABEL: define void @issue68120_uses_invalid_addrspacecast(
 ; CHECK-SAME: ptr addrspace(4) byref([56 x i8]) [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ADDRSPACECAST_0_TO_5:%.*]] = addrspacecast ptr addrspace(4) [[ARG]] to ptr addrspace(5)
+; CHECK-NEXT:    [[ADDRSPACECAST_0_TO_5:%.*]] = alloca [56 x i8], align 1, addrspace(5)
+; CHECK-NEXT:    call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef align 1 dereferenceable(56) [[ADDRSPACECAST_0_TO_5]], ptr addrspace(4) noundef align 1 dereferenceable(56) [[ARG]], i64 56, i1 false)
 ; CHECK-NEXT:    call void @byval_func(ptr addrspace(5) [[ADDRSPACECAST_0_TO_5]])
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll b/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll
index a7aa3a8ef1beb..c783b101251d5 100644
--- a/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll
+++ b/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll
@@ -429,9 +429,13 @@ entry:
 
 declare i8 @readonly_callee(ptr readonly nocapture)
 
+; FIXME: This should be able to fold to call i8 @readonly_callee(ptr nonnull @g1)
 define i8 @call_readonly_remove_alloca() {
 ; CHECK-LABEL: @call_readonly_remove_alloca(
-; CHECK-NEXT:    [[V:%.*]] = call i8 @readonly_callee(ptr nonnull @g1)
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [32 x i8], align 1, addrspace(1)
+; CHECK-NEXT:    call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef align 1 dereferenceable(32) [[ALLOCA]], ptr noundef nonnull align 16 dereferenceable(32) @g1, i64 32, i1 false)
+; CHECK-NEXT:    [[P:%.*]] = addrspacecast ptr addrspace(1) [[ALLOCA]] to ptr
+; CHECK-NEXT:    [[V:%.*]] = call i8 @readonly_callee(ptr [[P]])
 ; CHECK-NEXT:    ret i8 [[V]]
 ;
   %alloca = alloca [32 x i8], addrspace(1)



More information about the llvm-commits mailing list