[llvm] [InferAddressSpaces] Fix bad `addrspacecast` insertion for phinode (PR #163528)

Kerang Mao via llvm-commits llvm-commits at lists.llvm.org
Sat Nov 8 06:11:50 PST 2025


https://github.com/Kerang-BR updated https://github.com/llvm/llvm-project/pull/163528

>From a06009296acad4f3a60376f56e89e26774bdd701 Mon Sep 17 00:00:00 2001
From: Kerang <krmao at birentech.com>
Date: Wed, 15 Oct 2025 16:53:39 +0800
Subject: [PATCH 1/2] [InferAddressSpaces] Fix bad `addrspacecast` insertion
 for phinode

  The IR verifier will carsh if there is any instructions localed
before phi-node. The infer-address-spaces pass would like to insert
addrspacecast before phi-node in some corner cases. Indeed, since
the operand pointer(phi-node's incoming value) has been determined
to NewAS by the pass, it is safe to addrspacecast it immediately
after the position where defined it.
---
 .../Transforms/Scalar/InferAddressSpaces.cpp  | 39 +++++++++++++
 .../NVPTX/phinode-address-infer.ll            | 57 +++++++++++++++++++
 2 files changed, 96 insertions(+)
 create mode 100644 llvm/test/Transforms/InferAddressSpaces/NVPTX/phinode-address-infer.ll

diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 3ad87545953ff..352a1b331001a 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -617,6 +617,41 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
   return Postorder;
 }
 
+// Inserts an addrspacecast for a phi node operand, handling the proper
+// insertion position based on the operand type.
+static Value *phiNodeOperandWithNewAddressSpace(AddrSpaceCastInst *NewI,
+                                                Value *Operand) {
+  auto InsertBefore = [NewI](auto It) {
+    NewI->insertBefore(It);
+    NewI->setDebugLoc(It->getDebugLoc());
+    return NewI;
+  };
+
+  if (auto *Arg = dyn_cast<Argument>(Operand)) {
+    // For arguments, insert the cast at the beginning of entry block.
+    // Consider inserting at the dominating block for better placement.
+    Function *F = Arg->getParent();
+    auto InsertI = F->getEntryBlock().getFirstNonPHIIt();
+    return InsertBefore(InsertI);
+  }
+
+  // No check for Constant here, as constants are already handled.
+  assert(isa<Instruction>(Operand));
+
+  Instruction *OpInst = cast<Instruction>(Operand);
+  if (LLVM_UNLIKELY(OpInst->getOpcode() == Instruction::PHI)) {
+    // If the operand is defined by another PHI node, insert after the first
+    // non-PHI instruction at the corresponding basic block.
+    auto InsertI = OpInst->getParent()->getFirstNonPHIIt();
+    return InsertBefore(InsertI);
+  }
+
+  // Otherwise, insert immediately after the operand definition.
+  NewI->insertAfter(OpInst->getIterator());
+  NewI->setDebugLoc(OpInst->getDebugLoc());
+  return NewI;
+}
+
 // A helper function for cloneInstructionWithNewAddressSpace. Returns the clone
 // of OperandUse.get() in the new address space. If the clone is not ready yet,
 // returns poison in the new address space as a placeholder.
@@ -642,6 +677,10 @@ static Value *operandWithNewAddressSpaceOrCreatePoison(
     unsigned NewAS = I->second;
     Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Operand->getType(), NewAS);
     auto *NewI = new AddrSpaceCastInst(Operand, NewPtrTy);
+
+    if (LLVM_UNLIKELY(Inst->getOpcode() == Instruction::PHI))
+      return phiNodeOperandWithNewAddressSpace(NewI, Operand);
+
     NewI->insertBefore(Inst->getIterator());
     NewI->setDebugLoc(Inst->getDebugLoc());
     return NewI;
diff --git a/llvm/test/Transforms/InferAddressSpaces/NVPTX/phinode-address-infer.ll b/llvm/test/Transforms/InferAddressSpaces/NVPTX/phinode-address-infer.ll
new file mode 100644
index 0000000000000..e5c52cfc0d269
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/NVPTX/phinode-address-infer.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes='require<domtree>,infer-address-spaces' %s | FileCheck %s
+
+;;; Handle write corner case for infer-address-spaces with phi-nodes. The
+;;; verifier will crash if we insert `addrspacecast` before phi-node.
+
+target triple = "nvptx64-nvidia-cuda"
+
+declare void @llvm.assume(i1 noundef)
+declare i1 @llvm.nvvm.isspacep.shared(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.global(ptr) readnone noinline
+
+define ptr @phinode_instr() {
+; CHECK-LABEL: @phinode_instr(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR_1:%.*]] = load ptr, ptr null, align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[PTR_1]] to ptr addrspace(3)
+; CHECK-NEXT:    [[BOOL_1:%.*]] = tail call i1 @llvm.nvvm.isspacep.shared(ptr [[PTR_1]])
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[BOOL_1]])
+; CHECK-NEXT:    br label [[IF_SINK_SPLIT:%.*]]
+; CHECK:       if.sink.split:
+; CHECK-NEXT:    [[PTR_SINK:%.*]] = phi ptr addrspace(3) [ [[TMP0]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(3) [[PTR_SINK]] to ptr
+; CHECK-NEXT:    ret ptr [[TMP1]]
+;
+entry:
+  %ptr.1 = load ptr, ptr null, align 8
+  %bool.1 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %ptr.1)
+  tail call void @llvm.assume(i1 %bool.1)
+  br label %if.sink.split
+
+if.sink.split:                                    ; preds = %entry
+  %ptr.sink = phi ptr [ %ptr.1, %entry ]
+  ret ptr %ptr.sink
+}
+
+define ptr @phinode_argument(ptr %lhs_ptr) {
+; CHECK-LABEL: @phinode_argument(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[LHS_PTR:%.*]] to ptr addrspace(1)
+; CHECK-NEXT:    [[BOOL_1:%.*]] = tail call i1 @llvm.nvvm.isspacep.global(ptr [[LHS_PTR]])
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[BOOL_1]])
+; CHECK-NEXT:    br label [[IF_SINK_SPLIT:%.*]]
+; CHECK:       if.sink.split:
+; CHECK-NEXT:    [[PTR_SINK:%.*]] = phi ptr addrspace(1) [ [[TMP0]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR_SINK]] to ptr
+; CHECK-NEXT:    ret ptr [[TMP1]]
+;
+entry:
+  %bool.1 = tail call i1 @llvm.nvvm.isspacep.global(ptr %lhs_ptr)
+  tail call void @llvm.assume(i1 %bool.1)
+  br label %if.sink.split
+
+if.sink.split:                                    ; preds = %entry
+  %ptr.sink = phi ptr [ %lhs_ptr, %entry ]
+  ret ptr %ptr.sink
+}

>From 61ee0ea985a4b8a2441b2a7565bb674441f59271 Mon Sep 17 00:00:00 2001
From: Kerang <krmao at birentech.com>
Date: Sat, 8 Nov 2025 22:10:59 +0800
Subject: [PATCH 2/2] Revised the nonsence cases and add new amdgpu case.

---
 .../AMDGPU/phinode-address-infer.ll           | 55 +++++++++++++++++++
 .../NVPTX/phinode-address-infer.ll            | 22 ++++----
 2 files changed, 67 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/Transforms/InferAddressSpaces/AMDGPU/phinode-address-infer.ll

diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/phinode-address-infer.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/phinode-address-infer.ll
new file mode 100644
index 0000000000000..319c26a24b271
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/phinode-address-infer.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -S -passes='require<domtree>,infer-address-spaces' %s | FileCheck %s
+
+define void @test(ptr %lhs_ptr, ptr %rhs_ptr) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[LHS_PTR:%.*]], ptr [[RHS_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PTR_1:%.*]] = load ptr, ptr [[LHS_PTR]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[PTR_1]] to ptr addrspace(3)
+; CHECK-NEXT:    [[BOOL_1:%.*]] = tail call i1 @llvm.amdgcn.is.shared(ptr [[PTR_1]])
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[BOOL_1]])
+; CHECK-NEXT:    [[PTR_2:%.*]] = load ptr, ptr [[RHS_PTR]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[PTR_2]] to ptr addrspace(3)
+; CHECK-NEXT:    [[BOOL_2:%.*]] = tail call i1 @llvm.amdgcn.is.shared(ptr [[PTR_2]])
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[BOOL_2]])
+; CHECK-NEXT:    br i1 poison, label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    [[V1:%.*]] = load i32, ptr null, align 4
+; CHECK-NEXT:    br label %[[IF_SINK_SPLIT:.*]]
+; CHECK:       [[IF_ELSE]]:
+; CHECK-NEXT:    [[V2:%.*]] = load i32, ptr null, align 4
+; CHECK-NEXT:    br label %[[IF_SINK_SPLIT]]
+; CHECK:       [[IF_SINK_SPLIT]]:
+; CHECK-NEXT:    [[PTR_SINK:%.*]] = phi ptr addrspace(3) [ [[TMP0]], %[[IF_THEN]] ], [ [[TMP1]], %[[IF_ELSE]] ]
+; CHECK-NEXT:    [[V_SINK:%.*]] = phi i32 [ [[V1]], %[[IF_THEN]] ], [ [[V2]], %[[IF_ELSE]] ]
+; CHECK-NEXT:    store i32 [[V_SINK]], ptr addrspace(3) [[PTR_SINK]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %ptr.1 = load ptr, ptr %lhs_ptr, align 8
+  %bool.1 = tail call i1 @llvm.amdgcn.is.shared(ptr %ptr.1)
+  tail call void @llvm.assume(i1 %bool.1)
+
+  %ptr.2 = load ptr, ptr %rhs_ptr, align 8
+  %bool.2 = tail call i1 @llvm.amdgcn.is.shared(ptr %ptr.2)
+  tail call void @llvm.assume(i1 %bool.2)
+  br i1 poison, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %v1 = load i32, ptr null, align 4
+  br label %if.sink.split
+
+if.else:                                          ; preds = %entry
+  %v2 = load i32, ptr null, align 4
+  br label %if.sink.split
+
+if.sink.split:                                    ; preds = %if.else, %if.then
+  %ptr.sink = phi ptr [ %ptr.1, %if.then ], [ %ptr.2, %if.else ]
+  %v.sink = phi i32 [ %v1, %if.then ], [ %v2, %if.else ]
+  store i32 %v.sink, ptr %ptr.sink, align 4
+  ret void
+}
+
+declare void @llvm.assume(i1 noundef)
+declare i1 @llvm.amdgcn.is.shared(ptr)
diff --git a/llvm/test/Transforms/InferAddressSpaces/NVPTX/phinode-address-infer.ll b/llvm/test/Transforms/InferAddressSpaces/NVPTX/phinode-address-infer.ll
index e5c52cfc0d269..04a6281dc4bd8 100644
--- a/llvm/test/Transforms/InferAddressSpaces/NVPTX/phinode-address-infer.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/NVPTX/phinode-address-infer.ll
@@ -7,10 +7,10 @@
 target triple = "nvptx64-nvidia-cuda"
 
 declare void @llvm.assume(i1 noundef)
-declare i1 @llvm.nvvm.isspacep.shared(ptr) readnone noinline
-declare i1 @llvm.nvvm.isspacep.global(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.shared(ptr)
+declare i1 @llvm.nvvm.isspacep.global(ptr)
 
-define ptr @phinode_instr() {
+define void @phinode_instr() {
 ; CHECK-LABEL: @phinode_instr(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[PTR_1:%.*]] = load ptr, ptr null, align 8
@@ -20,8 +20,8 @@ define ptr @phinode_instr() {
 ; CHECK-NEXT:    br label [[IF_SINK_SPLIT:%.*]]
 ; CHECK:       if.sink.split:
 ; CHECK-NEXT:    [[PTR_SINK:%.*]] = phi ptr addrspace(3) [ [[TMP0]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(3) [[PTR_SINK]] to ptr
-; CHECK-NEXT:    ret ptr [[TMP1]]
+; CHECK-NEXT:    store i32 1, ptr addrspace(3) [[PTR_SINK]], align 4
+; CHECK-NEXT:    ret void
 ;
 entry:
   %ptr.1 = load ptr, ptr null, align 8
@@ -31,10 +31,11 @@ entry:
 
 if.sink.split:                                    ; preds = %entry
   %ptr.sink = phi ptr [ %ptr.1, %entry ]
-  ret ptr %ptr.sink
+  store i32 1, ptr %ptr.sink, align 4
+  ret void
 }
 
-define ptr @phinode_argument(ptr %lhs_ptr) {
+define void @phinode_argument(ptr %lhs_ptr) {
 ; CHECK-LABEL: @phinode_argument(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr [[LHS_PTR:%.*]] to ptr addrspace(1)
@@ -43,8 +44,8 @@ define ptr @phinode_argument(ptr %lhs_ptr) {
 ; CHECK-NEXT:    br label [[IF_SINK_SPLIT:%.*]]
 ; CHECK:       if.sink.split:
 ; CHECK-NEXT:    [[PTR_SINK:%.*]] = phi ptr addrspace(1) [ [[TMP0]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR_SINK]] to ptr
-; CHECK-NEXT:    ret ptr [[TMP1]]
+; CHECK-NEXT:    store i32 1, ptr addrspace(1) [[PTR_SINK]], align 4
+; CHECK-NEXT:    ret void
 ;
 entry:
   %bool.1 = tail call i1 @llvm.nvvm.isspacep.global(ptr %lhs_ptr)
@@ -53,5 +54,6 @@ entry:
 
 if.sink.split:                                    ; preds = %entry
   %ptr.sink = phi ptr [ %lhs_ptr, %entry ]
-  ret ptr %ptr.sink
+  store i32 1, ptr %ptr.sink, align 4
+  ret void
 }



More information about the llvm-commits mailing list