[llvm] [AMDGPU] fold a call to implictarg.ptr to a poison with no-implicitarg-ptr (PR #186925)

Yoonseo Choi via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 16 17:07:57 PDT 2026


https://github.com/yoonseoch created https://github.com/llvm/llvm-project/pull/186925

None

>From 17299cf5d9a494585b4fd7d4364b0939e2f961d3 Mon Sep 17 00:00:00 2001
From: Yoonseo Choi <yoonchoi at amd.com>
Date: Mon, 16 Mar 2026 19:04:41 -0500
Subject: [PATCH] fold a call to implictarg.ptr to a poison value when
 no-implicitarg-ptr exists

---
 .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp     |  2 +
 .../AMDGPU/llvm.amdgcn.implicitarg.ptr.ll     | 41 +++++++++++++++++++
 2 files changed, 43 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 37dcc34e2e61c..4f866087a1309 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -723,6 +723,8 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
   Intrinsic::ID IID = II.getIntrinsicID();
   switch (IID) {
   case Intrinsic::amdgcn_implicitarg_ptr: {
+    if (II.getFunction()->hasFnAttribute("amdgpu-no-implicitarg-ptr"))
+      return IC.replaceInstUsesWith(II, PoisonValue::get(II.getType()));
     uint64_t ImplicitArgBytes = ST->getImplicitArgNumBytes(*II.getFunction());
 
     uint64_t CurrentOrNullBytes =
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
index 43e33f7170c50..048222d0bb22b 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
@@ -68,10 +68,51 @@ entry:
   ret ptr addrspace(4) %tmp
 }
 
+; When amdgpu-no-implicitarg-ptr is set, calling the intrinsic is UB and the
+; call should be folded to poison.
+define ptr addrspace(4) @no_implicitarg_ptr() #2 {
+; AMDHSA-LABEL: define ptr addrspace(4) @no_implicitarg_ptr(
+; AMDHSA-SAME: ) #[[ATTR0:[0-9]+]] {
+; AMDHSA-NEXT:  [[ENTRY:.*:]]
+; AMDHSA-NEXT:    ret ptr addrspace(4) poison
+;
+; MESA-LABEL: define ptr addrspace(4) @no_implicitarg_ptr(
+; MESA-SAME: ) #[[ATTR0:[0-9]+]] {
+; MESA-NEXT:  [[ENTRY:.*:]]
+; MESA-NEXT:    ret ptr addrspace(4) poison
+;
+entry:
+  %tmp = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+  ret ptr addrspace(4) %tmp
+}
+
+; When amdgpu-no-implicitarg-ptr is set, a load from the intrinsic's returned
+; pointer is also UB. The call folds to poison and loading from poison yields
+; poison via existing InstCombine rules.
+define i32 @no_implicitarg_ptr_load() #2 {
+; AMDHSA-LABEL: define i32 @no_implicitarg_ptr_load(
+; AMDHSA-SAME: ) #[[ATTR0]] {
+; AMDHSA-NEXT:  [[ENTRY:.*:]]
+; AMDHSA-NEXT:    store i1 true, ptr poison, align 1
+; AMDHSA-NEXT:    ret i32 poison
+;
+; MESA-LABEL: define i32 @no_implicitarg_ptr_load(
+; MESA-SAME: ) #[[ATTR0]] {
+; MESA-NEXT:  [[ENTRY:.*:]]
+; MESA-NEXT:    store i1 true, ptr poison, align 1
+; MESA-NEXT:    ret i32 poison
+;
+entry:
+  %ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+  %val = load i32, ptr addrspace(4) %ptr
+  ret i32 %val
+}
+
 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
 declare noundef align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
 
 attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { "amdgpu-no-implicitarg-ptr" }
 
 !llvm.module.flags = !{!0}
 



More information about the llvm-commits mailing list