[llvm] [AMDGPU] fold a call to implictarg.ptr to a poison with no-implicitarg-ptr (PR #186925)
Yoonseo Choi via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 16 17:07:57 PDT 2026
https://github.com/yoonseoch created https://github.com/llvm/llvm-project/pull/186925
None
>From 17299cf5d9a494585b4fd7d4364b0939e2f961d3 Mon Sep 17 00:00:00 2001
From: Yoonseo Choi <yoonchoi at amd.com>
Date: Mon, 16 Mar 2026 19:04:41 -0500
Subject: [PATCH] fold a call to implictarg.ptr to a poison value when
no-implicitarg-ptr exists
---
.../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 2 +
.../AMDGPU/llvm.amdgcn.implicitarg.ptr.ll | 41 +++++++++++++++++++
2 files changed, 43 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 37dcc34e2e61c..4f866087a1309 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -723,6 +723,8 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
Intrinsic::ID IID = II.getIntrinsicID();
switch (IID) {
case Intrinsic::amdgcn_implicitarg_ptr: {
+ if (II.getFunction()->hasFnAttribute("amdgpu-no-implicitarg-ptr"))
+ return IC.replaceInstUsesWith(II, PoisonValue::get(II.getType()));
uint64_t ImplicitArgBytes = ST->getImplicitArgNumBytes(*II.getFunction());
uint64_t CurrentOrNullBytes =
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
index 43e33f7170c50..048222d0bb22b 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
@@ -68,10 +68,51 @@ entry:
ret ptr addrspace(4) %tmp
}
+; When amdgpu-no-implicitarg-ptr is set, calling the intrinsic is UB and the
+; call should be folded to poison.
+define ptr addrspace(4) @no_implicitarg_ptr() #2 {
+; AMDHSA-LABEL: define ptr addrspace(4) @no_implicitarg_ptr(
+; AMDHSA-SAME: ) #[[ATTR0:[0-9]+]] {
+; AMDHSA-NEXT: [[ENTRY:.*:]]
+; AMDHSA-NEXT: ret ptr addrspace(4) poison
+;
+; MESA-LABEL: define ptr addrspace(4) @no_implicitarg_ptr(
+; MESA-SAME: ) #[[ATTR0:[0-9]+]] {
+; MESA-NEXT: [[ENTRY:.*:]]
+; MESA-NEXT: ret ptr addrspace(4) poison
+;
+entry:
+ %tmp = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+ ret ptr addrspace(4) %tmp
+}
+
+; When amdgpu-no-implicitarg-ptr is set, a load from the intrinsic's returned
+; pointer is also UB. The call folds to poison and loading from poison yields
+; poison via existing InstCombine rules.
+define i32 @no_implicitarg_ptr_load() #2 {
+; AMDHSA-LABEL: define i32 @no_implicitarg_ptr_load(
+; AMDHSA-SAME: ) #[[ATTR0]] {
+; AMDHSA-NEXT: [[ENTRY:.*:]]
+; AMDHSA-NEXT: store i1 true, ptr poison, align 1
+; AMDHSA-NEXT: ret i32 poison
+;
+; MESA-LABEL: define i32 @no_implicitarg_ptr_load(
+; MESA-SAME: ) #[[ATTR0]] {
+; MESA-NEXT: [[ENTRY:.*:]]
+; MESA-NEXT: store i1 true, ptr poison, align 1
+; MESA-NEXT: ret i32 poison
+;
+entry:
+ %ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+ %val = load i32, ptr addrspace(4) %ptr
+ ret i32 %val
+}
+
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare noundef align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { "amdgpu-no-implicitarg-ptr" }
!llvm.module.flags = !{!0}
More information about the llvm-commits
mailing list