[llvm] [AMDGPU] Add !noalias metadata to mem-accessing calls w/o pointer args (PR #188949)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 2 07:39:32 PDT 2026
https://github.com/michaelselehov updated https://github.com/llvm/llvm-project/pull/188949
>From b9aae20d1eae7e593b1dd332d1431923f6cfcbb3 Mon Sep 17 00:00:00 2001
From: mselehov <mselehov at amd.com>
Date: Fri, 27 Mar 2026 04:47:16 -0500
Subject: [PATCH 1/4] [AMDGPU] Add !noalias metadata to memory-accessing calls
without pointer args
addAliasScopeMetadata in AMDGPULowerKernelArguments skips instructions
with empty PtrArgs, including memory-accessing calls that have no
pointer arguments (e.g. builtins like threadIdx()). Because these calls
never receive !noalias metadata, ScopedNoAliasAA cannot prove they
don't alias noalias kernel arguments. MemorySSA then conservatively
reports them as clobbers, which prevents AMDGPUAnnotateUniformValues
from marking loads as noclobber, blocking scalarization (s_load) and
forcing expensive vector loads (global_load) instead.
Fix by adding all noalias kernel argument scopes to !noalias metadata
for memory-accessing instructions with no pointer arguments. Since such
instructions cannot access memory through any kernel pointer argument,
all noalias scopes are safe to apply.
This fixes a performance regression in rocFFT introduced by
bd9668df0f00 ("[AMDGPU] Propagate alias information in
AMDGPULowerKernelArguments").
Assisted-by: Claude Opus
---
.../AMDGPU/AMDGPULowerKernelArguments.cpp | 86 ++++++++++---------
...rnel-arguments-noalias-call-no-ptr-args.ll | 43 ++++++++++
2 files changed, 90 insertions(+), 39 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/lower-kernel-arguments-noalias-call-no-ptr-args.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index f93b0b44f2488..344290c7aab22 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -113,53 +113,68 @@ static void addAliasScopeMetadata(Function &F, const DataLayout &DL,
PtrArgs.push_back(Arg);
}
- }
-
- if (PtrArgs.empty())
+ } else {
continue;
+ }
// Collect underlying objects of pointer arguments.
SmallVector<Metadata *, 4u> Scopes;
SmallPtrSet<const Value *, 4u> ObjSet;
SmallVector<Metadata *, 4u> NoAliases;
- for (const Value *Val : PtrArgs) {
- SmallVector<const Value *, 4u> Objects;
- getUnderlyingObjects(Val, Objects);
- ObjSet.insert_range(Objects);
- }
+ if (!PtrArgs.empty()) {
+ for (const Value *Val : PtrArgs) {
+ SmallVector<const Value *, 4u> Objects;
+ getUnderlyingObjects(Val, Objects);
+ ObjSet.insert_range(Objects);
+ }
- bool RequiresNoCaptureBefore = false;
- bool UsesUnknownObject = false;
- bool UsesAliasingPtr = false;
+ bool RequiresNoCaptureBefore = false;
+ bool UsesUnknownObject = false;
+ bool UsesAliasingPtr = false;
- for (const Value *Val : ObjSet) {
- if (isa<ConstantData>(Val))
- continue;
+ for (const Value *Val : ObjSet) {
+ if (isa<ConstantData>(Val))
+ continue;
- if (const Argument *Arg = dyn_cast<Argument>(Val)) {
- if (!Arg->hasAttribute(Attribute::NoAlias))
+ if (const Argument *Arg = dyn_cast<Argument>(Val)) {
+ if (!Arg->hasAttribute(Attribute::NoAlias))
+ UsesAliasingPtr = true;
+ } else
UsesAliasingPtr = true;
- } else
- UsesAliasingPtr = true;
- if (isEscapeSource(Val))
- RequiresNoCaptureBefore = true;
- else if (!isa<Argument>(Val) && isIdentifiedObject(Val))
- UsesUnknownObject = true;
- }
-
- if (UsesUnknownObject)
- continue;
+ if (isEscapeSource(Val))
+ RequiresNoCaptureBefore = true;
+ else if (!isa<Argument>(Val) && isIdentifiedObject(Val))
+ UsesUnknownObject = true;
+ }
- // Collect noalias scopes for instruction.
- for (const Argument *Arg : NoAliasArgs) {
- if (ObjSet.contains(Arg))
+ if (UsesUnknownObject)
continue;
- if (!RequiresNoCaptureBefore ||
- !capturesAnything(PointerMayBeCapturedBefore(
- Arg, false, I, &DT, false, CaptureComponents::Provenance)))
+ // Collect noalias scopes for instruction.
+ for (const Argument *Arg : NoAliasArgs) {
+ if (ObjSet.contains(Arg))
+ continue;
+
+ if (!RequiresNoCaptureBefore ||
+ !capturesAnything(PointerMayBeCapturedBefore(
+ Arg, false, I, &DT, false, CaptureComponents::Provenance)))
+ NoAliases.push_back(NewScopes[Arg]);
+ }
+
+ // Collect scopes for alias.scope metadata.
+ if (!UsesAliasingPtr)
+ for (const Argument *Arg : NoAliasArgs) {
+ if (ObjSet.count(Arg))
+ Scopes.push_back(NewScopes[Arg]);
+ }
+ } else {
+ // The instruction accesses memory but has no pointer arguments (e.g. a
+ // call without pointer args like a threadIdx() builtin). It cannot
+ // access memory through any noalias kernel argument, so all noalias
+ // scopes apply.
+ for (const Argument *Arg : NoAliasArgs)
NoAliases.push_back(NewScopes[Arg]);
}
@@ -171,13 +186,6 @@ static void addAliasScopeMetadata(Function &F, const DataLayout &DL,
Inst->setMetadata(LLVMContext::MD_noalias, NewMD);
}
- // Collect scopes for alias.scope metadata.
- if (!UsesAliasingPtr)
- for (const Argument *Arg : NoAliasArgs) {
- if (ObjSet.count(Arg))
- Scopes.push_back(NewScopes[Arg]);
- }
-
// Add alias.scope metadata to instruction.
if (!Scopes.empty()) {
MDNode *NewMD =
diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-arguments-noalias-call-no-ptr-args.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-arguments-noalias-call-no-ptr-args.ll
new file mode 100644
index 0000000000000..bdceabf0f233e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-arguments-noalias-call-no-ptr-args.ll
@@ -0,0 +1,43 @@
+; RUN: opt -mtriple=amdgcn-- -S -passes=amdgpu-lower-kernel-arguments %s | FileCheck %s
+
+; Regression test for a bug where addAliasScopeMetadata skipped memory-
+; accessing calls with no pointer arguments (e.g. threadIdx()-like builtins),
+; leaving them without !noalias metadata. This caused AA to conservatively
+; report them as potential clobbers of noalias kernel arguments, blocking
+; downstream scalarization in AMDGPUAnnotateUniformValues and causing severe
+; performance regressions (e.g. in rocFFT).
+
+declare i32 @memory_read_no_ptr_args() #1
+
+; The call reads memory but has no pointer arguments — it cannot alias
+; any noalias kernel argument. The pass must add !noalias metadata to it.
+define amdgpu_kernel void @call_without_ptr_args(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
+; CHECK-LABEL: @call_without_ptr_args(
+; CHECK: [[CALL:%.*]] = call i32 @memory_read_no_ptr_args(){{.*}} !noalias [[SCOPES:![0-9]+]]
+; CHECK: load i32, {{.*}} !alias.scope {{.*}} !noalias
+; CHECK: store i32 {{.*}} !alias.scope {{.*}} !noalias
+; CHECK: ret void
+ %val = call i32 @memory_read_no_ptr_args()
+ %gep = getelementptr i32, ptr addrspace(1) %in, i32 %val
+ %load = load i32, ptr addrspace(1) %gep, align 4
+ store i32 %load, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+; Same scenario but the call is readnone — should NOT get noalias metadata
+; because it doesn't access memory at all and is skipped by the pass.
+declare i32 @readnone_no_ptr_args() #2
+
+define amdgpu_kernel void @readnone_call_without_ptr_args(ptr addrspace(1) noalias %out) #0 {
+; CHECK-LABEL: @readnone_call_without_ptr_args(
+; CHECK: call i32 @readnone_no_ptr_args()
+; CHECK-NOT: !noalias
+; CHECK: store i32
+ %val = call i32 @readnone_no_ptr_args()
+ store i32 %val, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind memory(read) }
+attributes #2 = { nounwind memory(none) }
>From 309725162e74269e61e1d24dc51a9296f944e423 Mon Sep 17 00:00:00 2001
From: mselehov <mselehov at amd.com>
Date: Fri, 27 Mar 2026 09:21:05 -0500
Subject: [PATCH 2/4] [NFC] Add comments to the no-pointer-args handling in
addAliasScopeMetadata
---
.../Target/AMDGPU/AMDGPULowerKernelArguments.cpp | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index 344290c7aab22..68c8331130aa0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -114,6 +114,7 @@ static void addAliasScopeMetadata(Function &F, const DataLayout &DL,
PtrArgs.push_back(Arg);
}
} else {
+ // Not a memory access and not a call — nothing to annotate.
continue;
}
@@ -123,6 +124,8 @@ static void addAliasScopeMetadata(Function &F, const DataLayout &DL,
SmallVector<Metadata *, 4u> NoAliases;
if (!PtrArgs.empty()) {
+ // Trace pointer arguments back to underlying objects and decide which
+ // noalias scopes apply based on provenance and capture analysis.
for (const Value *Val : PtrArgs) {
SmallVector<const Value *, 4u> Objects;
getUnderlyingObjects(Val, Objects);
@@ -170,10 +173,12 @@ static void addAliasScopeMetadata(Function &F, const DataLayout &DL,
Scopes.push_back(NewScopes[Arg]);
}
} else {
- // The instruction accesses memory but has no pointer arguments (e.g. a
- // call without pointer args like a threadIdx() builtin). It cannot
- // access memory through any noalias kernel argument, so all noalias
- // scopes apply.
+ // The instruction accesses memory but has no pointer arguments
+ // (e.g. a HIP builtin like threadIdx()). Since none of its operands
+ // derive from any noalias kernel argument, it cannot possibly alias
+ // them. Mark it as !noalias w.r.t. every noalias scope so that
+ // ScopedNoAliasAA can prove non-aliasing when other instructions
+ // reference those scopes via !alias.scope.
for (const Argument *Arg : NoAliasArgs)
NoAliases.push_back(NewScopes[Arg]);
}
>From af81a1f0c5c8056671340cefaffb92320ff8ce72 Mon Sep 17 00:00:00 2001
From: mselehov <mselehov at amd.com>
Date: Mon, 30 Mar 2026 02:37:53 -0500
Subject: [PATCH 3/4] Address review comments: fix triple, remove threadIdx
references
- Use amdgcn-amd-amdhsa triple in test (shiltian nit)
- Remove misleading threadIdx() references from code comments and test
description (arsenm feedback)
- Adjust CHECK lines for full kernarg lowering with the new triple
---
.../AMDGPU/AMDGPULowerKernelArguments.cpp | 11 +++++------
...kernel-arguments-noalias-call-no-ptr-args.ll | 17 ++++++++---------
2 files changed, 13 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index 68c8331130aa0..a13011eca5a60 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -173,12 +173,11 @@ static void addAliasScopeMetadata(Function &F, const DataLayout &DL,
Scopes.push_back(NewScopes[Arg]);
}
} else {
- // The instruction accesses memory but has no pointer arguments
- // (e.g. a HIP builtin like threadIdx()). Since none of its operands
- // derive from any noalias kernel argument, it cannot possibly alias
- // them. Mark it as !noalias w.r.t. every noalias scope so that
- // ScopedNoAliasAA can prove non-aliasing when other instructions
- // reference those scopes via !alias.scope.
+ // The instruction accesses memory but has no pointer arguments.
+ // Since none of its operands derive from any noalias kernel argument,
+ // it cannot possibly alias them. Mark it as !noalias w.r.t. every
+ // noalias scope so that ScopedNoAliasAA can prove non-aliasing when
+ // other instructions reference those scopes via !alias.scope.
for (const Argument *Arg : NoAliasArgs)
NoAliases.push_back(NewScopes[Arg]);
}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-arguments-noalias-call-no-ptr-args.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-arguments-noalias-call-no-ptr-args.ll
index bdceabf0f233e..49bc01043ca62 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-kernel-arguments-noalias-call-no-ptr-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-arguments-noalias-call-no-ptr-args.ll
@@ -1,11 +1,11 @@
-; RUN: opt -mtriple=amdgcn-- -S -passes=amdgpu-lower-kernel-arguments %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=amdgpu-lower-kernel-arguments %s | FileCheck %s
; Regression test for a bug where addAliasScopeMetadata skipped memory-
-; accessing calls with no pointer arguments (e.g. threadIdx()-like builtins),
-; leaving them without !noalias metadata. This caused AA to conservatively
-; report them as potential clobbers of noalias kernel arguments, blocking
-; downstream scalarization in AMDGPUAnnotateUniformValues and causing severe
-; performance regressions (e.g. in rocFFT).
+; accessing calls with no pointer arguments, leaving them without !noalias
+; metadata. This caused AA to conservatively report them as potential
+; clobbers of noalias kernel arguments, blocking downstream scalarization
+; in AMDGPUAnnotateUniformValues and causing severe performance regressions
+; (e.g. in rocFFT).
declare i32 @memory_read_no_ptr_args() #1
@@ -13,7 +13,7 @@ declare i32 @memory_read_no_ptr_args() #1
; any noalias kernel argument. The pass must add !noalias metadata to it.
define amdgpu_kernel void @call_without_ptr_args(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
; CHECK-LABEL: @call_without_ptr_args(
-; CHECK: [[CALL:%.*]] = call i32 @memory_read_no_ptr_args(){{.*}} !noalias [[SCOPES:![0-9]+]]
+; CHECK: call i32 @memory_read_no_ptr_args(), !noalias [[SCOPES:![0-9]+]]
; CHECK: load i32, {{.*}} !alias.scope {{.*}} !noalias
; CHECK: store i32 {{.*}} !alias.scope {{.*}} !noalias
; CHECK: ret void
@@ -30,8 +30,7 @@ declare i32 @readnone_no_ptr_args() #2
define amdgpu_kernel void @readnone_call_without_ptr_args(ptr addrspace(1) noalias %out) #0 {
; CHECK-LABEL: @readnone_call_without_ptr_args(
-; CHECK: call i32 @readnone_no_ptr_args()
-; CHECK-NOT: !noalias
+; CHECK: {{call i32 @readnone_no_ptr_args\(\)$}}
; CHECK: store i32
%val = call i32 @readnone_no_ptr_args()
store i32 %val, ptr addrspace(1) %out, align 4
>From 45e2073c7b75a45bbd92457b305a258e9015d435 Mon Sep 17 00:00:00 2001
From: mselehov <mselehov at amd.com>
Date: Thu, 2 Apr 2026 02:17:04 -0500
Subject: [PATCH 4/4] Remove unused SCOPES FileCheck capture variable in test
---
...rnel-arguments-noalias-call-no-ptr-args.ll | 43 +++++++++++++++----
1 file changed, 35 insertions(+), 8 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-arguments-noalias-call-no-ptr-args.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-arguments-noalias-call-no-ptr-args.ll
index 49bc01043ca62..415dd43044773 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-kernel-arguments-noalias-call-no-ptr-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-arguments-noalias-call-no-ptr-args.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=amdgpu-lower-kernel-arguments %s | FileCheck %s
; Regression test for a bug where addAliasScopeMetadata skipped memory-
@@ -12,11 +13,19 @@ declare i32 @memory_read_no_ptr_args() #1
; The call reads memory but has no pointer arguments — it cannot alias
; any noalias kernel argument. The pass must add !noalias metadata to it.
define amdgpu_kernel void @call_without_ptr_args(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
-; CHECK-LABEL: @call_without_ptr_args(
-; CHECK: call i32 @memory_read_no_ptr_args(), !noalias [[SCOPES:![0-9]+]]
-; CHECK: load i32, {{.*}} !alias.scope {{.*}} !noalias
-; CHECK: store i32 {{.*}} !alias.scope {{.*}} !noalias
-; CHECK: ret void
+; CHECK-LABEL: define amdgpu_kernel void @call_without_ptr_args(
+; CHECK-SAME: ptr addrspace(1) noalias [[OUT:%.*]], ptr addrspace(1) noalias [[IN:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+; CHECK-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT]], i64 0
+; CHECK-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0:![0-9]+]]
+; CHECK-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT]], i64 8
+; CHECK-NEXT: [[IN_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 8, !invariant.load [[META0]]
+; CHECK-NEXT: [[VAL:%.*]] = call i32 @memory_read_no_ptr_args(), !noalias [[META1:![0-9]+]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[IN_LOAD]], i32 [[VAL]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META6:![0-9]+]]
+; CHECK-NEXT: store i32 [[LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4, !alias.scope [[META6]], !noalias [[META5]]
+; CHECK-NEXT: ret void
+;
%val = call i32 @memory_read_no_ptr_args()
%gep = getelementptr i32, ptr addrspace(1) %in, i32 %val
%load = load i32, ptr addrspace(1) %gep, align 4
@@ -29,9 +38,15 @@ define amdgpu_kernel void @call_without_ptr_args(ptr addrspace(1) noalias %out,
declare i32 @readnone_no_ptr_args() #2
define amdgpu_kernel void @readnone_call_without_ptr_args(ptr addrspace(1) noalias %out) #0 {
-; CHECK-LABEL: @readnone_call_without_ptr_args(
-; CHECK: {{call i32 @readnone_no_ptr_args\(\)$}}
-; CHECK: store i32
+; CHECK-LABEL: define amdgpu_kernel void @readnone_call_without_ptr_args(
+; CHECK-SAME: ptr addrspace(1) noalias [[OUT:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[READNONE_CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+; CHECK-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[READNONE_CALL_WITHOUT_PTR_ARGS_KERNARG_SEGMENT]], i64 0
+; CHECK-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]]
+; CHECK-NEXT: [[VAL:%.*]] = call i32 @readnone_no_ptr_args()
+; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT_LOAD]], align 4, !alias.scope [[META7:![0-9]+]]
+; CHECK-NEXT: ret void
+;
%val = call i32 @readnone_no_ptr_args()
store i32 %val, ptr addrspace(1) %out, align 4
ret void
@@ -40,3 +55,15 @@ define amdgpu_kernel void @readnone_call_without_ptr_args(ptr addrspace(1) noali
attributes #0 = { nounwind }
attributes #1 = { nounwind memory(read) }
attributes #2 = { nounwind memory(none) }
+;.
+; CHECK: [[META0]] = !{}
+; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META4:![0-9]+]]}
+; CHECK: [[META2]] = distinct !{[[META2]], [[META3:![0-9]+]], !"out"}
+; CHECK: [[META3]] = distinct !{[[META3]], !"call_without_ptr_args"}
+; CHECK: [[META4]] = distinct !{[[META4]], [[META3]], !"in"}
+; CHECK: [[META5]] = !{[[META4]]}
+; CHECK: [[META6]] = !{[[META2]]}
+; CHECK: [[META7]] = !{[[META8:![0-9]+]]}
+; CHECK: [[META8]] = distinct !{[[META8]], [[META9:![0-9]+]], !"out"}
+; CHECK: [[META9]] = distinct !{[[META9]], !"readnone_call_without_ptr_args"}
+;.
More information about the llvm-commits
mailing list