[llvm] [AMDGPU] Add !noalias metadata to mem-accessing calls w/o pointer args (PR #188949)

via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 27 03:22:42 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: michaelselehov

<details>
<summary>Changes</summary>

addAliasScopeMetadata in AMDGPULowerKernelArguments skips instructions with empty PtrArgs, including memory-accessing calls that have no pointer arguments (e.g. builtins like threadIdx()). Because these calls never receive !noalias metadata, ScopedNoAliasAA cannot prove they don't alias noalias kernel arguments. MemorySSA then conservatively reports them as clobbers, which prevents AMDGPUAnnotateUniformValues from marking loads as noclobber, blocking scalarization (s_load) and forcing expensive vector loads (global_load) instead.

Fix by adding all noalias kernel argument scopes to !noalias metadata for memory-accessing instructions with no pointer arguments. Since such instructions cannot access memory through any kernel pointer argument, all noalias scopes are safe to apply.

This fixes a performance regression in rocFFT introduced by bd9668df0f00 ("[AMDGPU] Propagate alias information in AMDGPULowerKernelArguments").

Assisted-by: Claude Opus

---
Full diff: https://github.com/llvm/llvm-project/pull/188949.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp (+47-39) 
- (added) llvm/test/CodeGen/AMDGPU/lower-kernel-arguments-noalias-call-no-ptr-args.ll (+43) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index f93b0b44f2488..344290c7aab22 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -113,53 +113,68 @@ static void addAliasScopeMetadata(Function &F, const DataLayout &DL,
 
         PtrArgs.push_back(Arg);
       }
-    }
-
-    if (PtrArgs.empty())
+    } else {
       continue;
+    }
 
     // Collect underlying objects of pointer arguments.
     SmallVector<Metadata *, 4u> Scopes;
     SmallPtrSet<const Value *, 4u> ObjSet;
     SmallVector<Metadata *, 4u> NoAliases;
 
-    for (const Value *Val : PtrArgs) {
-      SmallVector<const Value *, 4u> Objects;
-      getUnderlyingObjects(Val, Objects);
-      ObjSet.insert_range(Objects);
-    }
+    if (!PtrArgs.empty()) {
+      for (const Value *Val : PtrArgs) {
+        SmallVector<const Value *, 4u> Objects;
+        getUnderlyingObjects(Val, Objects);
+        ObjSet.insert_range(Objects);
+      }
 
-    bool RequiresNoCaptureBefore = false;
-    bool UsesUnknownObject = false;
-    bool UsesAliasingPtr = false;
+      bool RequiresNoCaptureBefore = false;
+      bool UsesUnknownObject = false;
+      bool UsesAliasingPtr = false;
 
-    for (const Value *Val : ObjSet) {
-      if (isa<ConstantData>(Val))
-        continue;
+      for (const Value *Val : ObjSet) {
+        if (isa<ConstantData>(Val))
+          continue;
 
-      if (const Argument *Arg = dyn_cast<Argument>(Val)) {
-        if (!Arg->hasAttribute(Attribute::NoAlias))
+        if (const Argument *Arg = dyn_cast<Argument>(Val)) {
+          if (!Arg->hasAttribute(Attribute::NoAlias))
+            UsesAliasingPtr = true;
+        } else
           UsesAliasingPtr = true;
-      } else
-        UsesAliasingPtr = true;
 
-      if (isEscapeSource(Val))
-        RequiresNoCaptureBefore = true;
-      else if (!isa<Argument>(Val) && isIdentifiedObject(Val))
-        UsesUnknownObject = true;
-    }
-
-    if (UsesUnknownObject)
-      continue;
+        if (isEscapeSource(Val))
+          RequiresNoCaptureBefore = true;
+        else if (!isa<Argument>(Val) && isIdentifiedObject(Val))
+          UsesUnknownObject = true;
+      }
 
-    // Collect noalias scopes for instruction.
-    for (const Argument *Arg : NoAliasArgs) {
-      if (ObjSet.contains(Arg))
+      if (UsesUnknownObject)
         continue;
 
-      if (!RequiresNoCaptureBefore ||
-          !capturesAnything(PointerMayBeCapturedBefore(
-              Arg, false, I, &DT, false, CaptureComponents::Provenance)))
+      // Collect noalias scopes for instruction.
+      for (const Argument *Arg : NoAliasArgs) {
+        if (ObjSet.contains(Arg))
+          continue;
+
+        if (!RequiresNoCaptureBefore ||
+            !capturesAnything(PointerMayBeCapturedBefore(
+                Arg, false, I, &DT, false, CaptureComponents::Provenance)))
+          NoAliases.push_back(NewScopes[Arg]);
+      }
+
+      // Collect scopes for alias.scope metadata.
+      if (!UsesAliasingPtr)
+        for (const Argument *Arg : NoAliasArgs) {
+          if (ObjSet.count(Arg))
+            Scopes.push_back(NewScopes[Arg]);
+        }
+    } else {
+      // The instruction accesses memory but has no pointer arguments (e.g. a
+      // call without pointer args like a threadIdx() builtin). It cannot
+      // access memory through any noalias kernel argument, so all noalias
+      // scopes apply.
+      for (const Argument *Arg : NoAliasArgs)
         NoAliases.push_back(NewScopes[Arg]);
     }
 
@@ -171,13 +186,6 @@ static void addAliasScopeMetadata(Function &F, const DataLayout &DL,
       Inst->setMetadata(LLVMContext::MD_noalias, NewMD);
     }
 
-    // Collect scopes for alias.scope metadata.
-    if (!UsesAliasingPtr)
-      for (const Argument *Arg : NoAliasArgs) {
-        if (ObjSet.count(Arg))
-          Scopes.push_back(NewScopes[Arg]);
-      }
-
     // Add alias.scope metadata to instruction.
     if (!Scopes.empty()) {
       MDNode *NewMD =
diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-arguments-noalias-call-no-ptr-args.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-arguments-noalias-call-no-ptr-args.ll
new file mode 100644
index 0000000000000..bdceabf0f233e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-arguments-noalias-call-no-ptr-args.ll
@@ -0,0 +1,43 @@
+; RUN: opt -mtriple=amdgcn-- -S -passes=amdgpu-lower-kernel-arguments %s | FileCheck %s
+
+; Regression test for a bug where addAliasScopeMetadata skipped memory-
+; accessing calls with no pointer arguments (e.g. threadIdx()-like builtins),
+; leaving them without !noalias metadata. This caused AA to conservatively
+; report them as potential clobbers of noalias kernel arguments, blocking
+; downstream scalarization in AMDGPUAnnotateUniformValues and causing severe
+; performance regressions (e.g. in rocFFT).
+
+declare i32 @memory_read_no_ptr_args() #1
+
+; The call reads memory but has no pointer arguments — it cannot alias
+; any noalias kernel argument. The pass must add !noalias metadata to it.
+define amdgpu_kernel void @call_without_ptr_args(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
+; CHECK-LABEL: @call_without_ptr_args(
+; CHECK: [[CALL:%.*]] = call i32 @memory_read_no_ptr_args(){{.*}} !noalias [[SCOPES:![0-9]+]]
+; CHECK: load i32, {{.*}} !alias.scope {{.*}} !noalias
+; CHECK: store i32 {{.*}} !alias.scope {{.*}} !noalias
+; CHECK: ret void
+  %val = call i32 @memory_read_no_ptr_args()
+  %gep = getelementptr i32, ptr addrspace(1) %in, i32 %val
+  %load = load i32, ptr addrspace(1) %gep, align 4
+  store i32 %load, ptr addrspace(1) %out, align 4
+  ret void
+}
+
+; Same scenario but the call is readnone — should NOT get noalias metadata
+; because it doesn't access memory at all and is skipped by the pass.
+declare i32 @readnone_no_ptr_args() #2
+
+define amdgpu_kernel void @readnone_call_without_ptr_args(ptr addrspace(1) noalias %out) #0 {
+; CHECK-LABEL: @readnone_call_without_ptr_args(
+; CHECK: call i32 @readnone_no_ptr_args()
+; CHECK-NOT: !noalias
+; CHECK: store i32
+  %val = call i32 @readnone_no_ptr_args()
+  store i32 %val, ptr addrspace(1) %out, align 4
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind memory(read) }
+attributes #2 = { nounwind memory(none) }

``````````

</details>


https://github.com/llvm/llvm-project/pull/188949


More information about the llvm-commits mailing list