[llvm] [llvm][opt][Transforms] Preserve AMDGPU atomic metadata (PR #140314)

Alex Voicu via llvm-commits llvm-commits at lists.llvm.org
Fri May 23 13:41:43 PDT 2025


https://github.com/AlexVlx updated https://github.com/llvm/llvm-project/pull/140314

>From 31ceadae74d5df05d9baf501eaab27cc853bc806 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 16 May 2025 23:12:23 +0100
Subject: [PATCH 1/7] Do not drop atomic metadata when combining instructions.

---
 llvm/lib/Transforms/Utils/Local.cpp           | 11 ++++-
 .../SimplifyCFG/merge-amdgpu-atomic-md.ll     | 42 +++++++++++++++++++
 2 files changed, 52 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll

diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 3dbd605e19c3a..0d2a82d407170 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3303,6 +3303,12 @@ static void combineMetadata(Instruction *K, const Instruction *J,
                             bool DoesKMove, bool AAOnly = false) {
   SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
   K->getAllMetadataOtherThanDebugLoc(Metadata);
+
+  const unsigned AMDGPUMD[] = {
+      K->getContext().getMDKindID("amdgpu.no.fine.grained.memory"),
+      K->getContext().getMDKindID("amdgpu.no.remote.memory"),
+      K->getContext().getMDKindID("amdgpu.ignore.denormal.mode")};
+
   for (const auto &MD : Metadata) {
     unsigned Kind = MD.first;
     MDNode *JMD = J->getMetadata(Kind);
@@ -3311,7 +3317,10 @@ static void combineMetadata(Instruction *K, const Instruction *J,
     // TODO: Assert that this switch is exhaustive for fixed MD kinds.
     switch (Kind) {
       default:
-        K->setMetadata(Kind, nullptr); // Remove unknown metadata
+        if (K->isAtomic() && (find(AMDGPUMD, Kind) != std::cend(AMDGPUMD)))
+         break; // Preserve AMDGPU atomic metadata.
+        else
+          K->setMetadata(Kind, nullptr); // Remove unknown metadata
         break;
       case LLVMContext::MD_dbg:
         llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg");
diff --git a/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll b/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
new file mode 100644
index 0000000000000..1cd574e714b43
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+;; Test to ensure that AMDGPU atomic related metadata is not dropped when
+;; instructions are sunk. Currently the metadata from the first instruction
+;; is kept, which prevents full loss of optimisation information.
+
+; RUN: opt < %s -passes=simplifycfg -passes=simplifycfg -sink-common-insts -S | FileCheck %s
+
+define amdgpu_kernel void @f(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr {
+; CHECK-LABEL: define amdgpu_kernel void @f(
+; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[P_GLOBAL:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
+; CHECK-NEXT:    [[BRMERGE:%.*]] = select i1 [[PRED0]], i1 true, i1 [[PRED1]]
+; CHECK-NEXT:    br i1 [[BRMERGE]], label %[[IF_END_SINK_SPLIT:.*]], label %[[IF_END:.*]]
+; CHECK:       [[IF_END_SINK_SPLIT]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr addrspace(1) [[P_GLOBAL]], double [[D]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]], !amdgpu.no.remote.memory [[META0]]
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %p.global = addrspacecast ptr %p to ptr addrspace(1)
+  br i1 %pred0, label %for.body, label %for.body1
+
+for.body:
+  %0 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
+  br label %if.end
+
+for.body1:
+  br i1 %pred1, label %if.then, label %if.end
+
+if.then:
+  %1 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+!0 = !{!"float", !1, i64 0}
+!1 = !{!"omnipotent char", !2, i64 0}
+!2 = !{!"Simple C++ TBAA"}

>From 9e7ca22c6ab92921d934864fe7c6d8ecf2f1d40c Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 16 May 2025 23:20:37 +0100
Subject: [PATCH 2/7] Fix formatting.

---
 llvm/lib/Transforms/Utils/Local.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 0d2a82d407170..9b71005dff9fd 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3318,7 +3318,7 @@ static void combineMetadata(Instruction *K, const Instruction *J,
     switch (Kind) {
       default:
         if (K->isAtomic() && (find(AMDGPUMD, Kind) != std::cend(AMDGPUMD)))
-         break; // Preserve AMDGPU atomic metadata.
+          break; // Preserve AMDGPU atomic metadata.
         else
           K->setMetadata(Kind, nullptr); // Remove unknown metadata
         break;

>From 8ea68b4628b3128a9690ba673a6805ab9eac795c Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Tue, 20 May 2025 02:22:05 +0300
Subject: [PATCH 3/7] Clean up test.

---
 llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll b/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
index 1cd574e714b43..b4cd01a38d118 100644
--- a/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
+++ b/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
@@ -3,7 +3,7 @@
 ;; instructions are sunk. Currently the metadata from the first instruction
 ;; is kept, which prevents full loss of optimisation information.
 
-; RUN: opt < %s -passes=simplifycfg -passes=simplifycfg -sink-common-insts -S | FileCheck %s
+; RUN: opt < %s -passes=simplifycfg -sink-common-insts -S | FileCheck %s
 
 define amdgpu_kernel void @f(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr {
 ; CHECK-LABEL: define amdgpu_kernel void @f(
@@ -37,6 +37,4 @@ if.end:
   ret void
 }
 
-!0 = !{!"float", !1, i64 0}
-!1 = !{!"omnipotent char", !2, i64 0}
-!2 = !{!"Simple C++ TBAA"}
+!0 = !{}

>From c6cfed671d44755cd4156f2aa1cb23be8955eb57 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Tue, 20 May 2025 03:00:39 +0300
Subject: [PATCH 4/7] Handle all cases.

---
 llvm/lib/Transforms/Utils/Local.cpp           | 23 ++++--
 .../SimplifyCFG/merge-amdgpu-atomic-md.ll     | 71 ++++++++++++++++++-
 2 files changed, 86 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 9b71005dff9fd..7967b6c3100b9 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3304,11 +3304,11 @@ static void combineMetadata(Instruction *K, const Instruction *J,
   SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
   K->getAllMetadataOtherThanDebugLoc(Metadata);
 
-  const unsigned AMDGPUMD[] = {
-      K->getContext().getMDKindID("amdgpu.no.fine.grained.memory"),
-      K->getContext().getMDKindID("amdgpu.no.remote.memory"),
-      K->getContext().getMDKindID("amdgpu.ignore.denormal.mode")};
-
+  const auto IsAMDGPUMD = [=](unsigned Kind) {
+    return Kind == K->getContext().getMDKindID("amdgpu.no.fine.grained.memory")
+        || Kind == K->getContext().getMDKindID("amdgpu.no.remote.memory")
+        || Kind == K->getContext().getMDKindID("amdgpu.ignore.denormal.mode");
+  };
   for (const auto &MD : Metadata) {
     unsigned Kind = MD.first;
     MDNode *JMD = J->getMetadata(Kind);
@@ -3317,7 +3317,7 @@ static void combineMetadata(Instruction *K, const Instruction *J,
     // TODO: Assert that this switch is exhaustive for fixed MD kinds.
     switch (Kind) {
       default:
-        if (K->isAtomic() && (find(AMDGPUMD, Kind) != std::cend(AMDGPUMD)))
+        if (K->isAtomic() && IsAMDGPUMD(Kind))
           break; // Preserve AMDGPU atomic metadata.
         else
           K->setMetadata(Kind, nullptr); // Remove unknown metadata
@@ -3455,6 +3455,17 @@ static void combineMetadata(Instruction *K, const Instruction *J,
     K->setMetadata(LLVMContext::MD_prof,
                    MDNode::getMergedProfMetadata(KProf, JProf, K, J));
   }
+
+  // Preserve AMDGPU atomic metadata from J, if present. K might already be
+  // carrying this but overwriting should cause no issue.
+  if (K->isAtomic()) {
+    if (auto *JMD = J->getMetadata("amdgpu.no.fine.grained.memory"))
+      K->setMetadata("amdgpu.no.fine.grained.memory", JMD);
+    if (auto *JMD = J->getMetadata("amdgpu.no.remote.memory"))
+      K->setMetadata("amdgpu.no.remote.memory", JMD);
+    if (auto *JMD = J->getMetadata("amdgpu.ignore.denormal.mode"))
+      K->setMetadata("amdgpu.ignore.denormal.mode", JMD);
+  }
 }
 
 void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
diff --git a/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll b/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
index b4cd01a38d118..b387fddc99ad6 100644
--- a/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
+++ b/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
@@ -5,8 +5,8 @@
 
 ; RUN: opt < %s -passes=simplifycfg -sink-common-insts -S | FileCheck %s
 
-define amdgpu_kernel void @f(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr {
-; CHECK-LABEL: define amdgpu_kernel void @f(
+define amdgpu_kernel void @both(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr {
+; CHECK-LABEL: define amdgpu_kernel void @both(
 ; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[P_GLOBAL:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
@@ -37,4 +37,71 @@ if.end:
   ret void
 }
 
+define amdgpu_kernel void @from(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr {
+; CHECK-LABEL: define amdgpu_kernel void @from(
+; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[P_GLOBAL:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
+; CHECK-NEXT:    [[BRMERGE:%.*]] = select i1 [[PRED0]], i1 true, i1 [[PRED1]]
+; CHECK-NEXT:    br i1 [[BRMERGE]], label %[[IF_END_SINK_SPLIT:.*]], label %[[IF_END:.*]]
+; CHECK:       [[IF_END_SINK_SPLIT]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr addrspace(1) [[P_GLOBAL]], double [[D]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %p.global = addrspacecast ptr %p to ptr addrspace(1)
+  br i1 %pred0, label %for.body, label %for.body1
+
+for.body:
+  %0 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
+  br label %if.end
+
+for.body1:
+  br i1 %pred1, label %if.then, label %if.end
+
+if.then:
+  %1 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define amdgpu_kernel void @to(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr {
+; CHECK-LABEL: define amdgpu_kernel void @to(
+; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[P_GLOBAL:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
+; CHECK-NEXT:    [[BRMERGE:%.*]] = select i1 [[PRED0]], i1 true, i1 [[PRED1]]
+; CHECK-NEXT:    br i1 [[BRMERGE]], label %[[IF_END_SINK_SPLIT:.*]], label %[[IF_END:.*]]
+; CHECK:       [[IF_END_SINK_SPLIT]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr addrspace(1) [[P_GLOBAL]], double [[D]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %p.global = addrspacecast ptr %p to ptr addrspace(1)
+  br i1 %pred0, label %for.body, label %for.body1
+
+for.body:
+  %0 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8
+  br label %if.end
+
+for.body1:
+  br i1 %pred1, label %if.then, label %if.end
+
+if.then:
+  %1 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
+  br label %if.end
+
+if.end:
+  ret void
+}
+
 !0 = !{}
+;.
+; CHECK: [[META0]] = !{}
+;.

>From 401b882a1841ad884df4d60faefddd15f20724a0 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Tue, 20 May 2025 11:12:25 +0300
Subject: [PATCH 5/7] Fix formatting.

---
 llvm/lib/Transforms/Utils/Local.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 7967b6c3100b9..13f96981da64d 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3305,9 +3305,10 @@ static void combineMetadata(Instruction *K, const Instruction *J,
   K->getAllMetadataOtherThanDebugLoc(Metadata);
 
   const auto IsAMDGPUMD = [=](unsigned Kind) {
-    return Kind == K->getContext().getMDKindID("amdgpu.no.fine.grained.memory")
-        || Kind == K->getContext().getMDKindID("amdgpu.no.remote.memory")
-        || Kind == K->getContext().getMDKindID("amdgpu.ignore.denormal.mode");
+    return Kind ==
+               K->getContext().getMDKindID("amdgpu.no.fine.grained.memory") ||
+           Kind == K->getContext().getMDKindID("amdgpu.no.remote.memory") ||
+           Kind == K->getContext().getMDKindID("amdgpu.ignore.denormal.mode");
   };
   for (const auto &MD : Metadata) {
     unsigned Kind = MD.first;

>From c3096202f05f7142a2060a230ebf9fb162d184c4 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 23 May 2025 23:36:21 +0300
Subject: [PATCH 6/7] Intersection, not union.

---
 llvm/lib/Transforms/Utils/Local.cpp           | 12 +-----
 .../SimplifyCFG/merge-amdgpu-atomic-md.ll     | 42 ++++++++-----------
 2 files changed, 19 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 13f96981da64d..dde0d49e391f6 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3319,7 +3319,7 @@ static void combineMetadata(Instruction *K, const Instruction *J,
     switch (Kind) {
       default:
         if (K->isAtomic() && IsAMDGPUMD(Kind))
-          break; // Preserve AMDGPU atomic metadata.
+          K->setMetadata(Kind, MDNode::intersect(JMD, KMD));
         else
           K->setMetadata(Kind, nullptr); // Remove unknown metadata
         break;
@@ -3457,16 +3457,6 @@ static void combineMetadata(Instruction *K, const Instruction *J,
                    MDNode::getMergedProfMetadata(KProf, JProf, K, J));
   }
 
-  // Preserve AMDGPU atomic metadata from J, if present. K might already be
-  // carrying this but overwriting should cause no issue.
-  if (K->isAtomic()) {
-    if (auto *JMD = J->getMetadata("amdgpu.no.fine.grained.memory"))
-      K->setMetadata("amdgpu.no.fine.grained.memory", JMD);
-    if (auto *JMD = J->getMetadata("amdgpu.no.remote.memory"))
-      K->setMetadata("amdgpu.no.remote.memory", JMD);
-    if (auto *JMD = J->getMetadata("amdgpu.ignore.denormal.mode"))
-      K->setMetadata("amdgpu.ignore.denormal.mode", JMD);
-  }
 }
 
 void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
diff --git a/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll b/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
index b387fddc99ad6..56f188146c246 100644
--- a/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
+++ b/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
@@ -5,96 +5,90 @@
 
 ; RUN: opt < %s -passes=simplifycfg -sink-common-insts -S | FileCheck %s
 
-define amdgpu_kernel void @both(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr {
-; CHECK-LABEL: define amdgpu_kernel void @both(
-; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) local_unnamed_addr {
+define void @both(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) {
+; CHECK-LABEL: define void @both(
+; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[P_GLOBAL:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
 ; CHECK-NEXT:    [[BRMERGE:%.*]] = select i1 [[PRED0]], i1 true, i1 [[PRED1]]
 ; CHECK-NEXT:    br i1 [[BRMERGE]], label %[[IF_END_SINK_SPLIT:.*]], label %[[IF_END:.*]]
 ; CHECK:       [[IF_END_SINK_SPLIT]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr addrspace(1) [[P_GLOBAL]], double [[D]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]], !amdgpu.no.remote.memory [[META0]]
+; CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr [[P]], double [[D]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]], !amdgpu.no.remote.memory [[META0]]
 ; CHECK-NEXT:    br label %[[IF_END]]
 ; CHECK:       [[IF_END]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %p.global = addrspacecast ptr %p to ptr addrspace(1)
   br i1 %pred0, label %for.body, label %for.body1
 
 for.body:
-  %0 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
+  %0 = atomicrmw fadd ptr %p, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
   br label %if.end
 
 for.body1:
   br i1 %pred1, label %if.then, label %if.end
 
 if.then:
-  %1 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
+  %1 = atomicrmw fadd ptr %p, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
   br label %if.end
 
 if.end:
   ret void
 }
 
-define amdgpu_kernel void @from(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr {
-; CHECK-LABEL: define amdgpu_kernel void @from(
-; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) local_unnamed_addr {
+define void @from(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) {
+; CHECK-LABEL: define void @from(
+; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[P_GLOBAL:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
 ; CHECK-NEXT:    [[BRMERGE:%.*]] = select i1 [[PRED0]], i1 true, i1 [[PRED1]]
 ; CHECK-NEXT:    br i1 [[BRMERGE]], label %[[IF_END_SINK_SPLIT:.*]], label %[[IF_END:.*]]
 ; CHECK:       [[IF_END_SINK_SPLIT]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr addrspace(1) [[P_GLOBAL]], double [[D]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr [[P]], double [[D]] monotonic, align 8
 ; CHECK-NEXT:    br label %[[IF_END]]
 ; CHECK:       [[IF_END]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %p.global = addrspacecast ptr %p to ptr addrspace(1)
   br i1 %pred0, label %for.body, label %for.body1
 
 for.body:
-  %0 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
+  %0 = atomicrmw fadd ptr %p, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
   br label %if.end
 
 for.body1:
   br i1 %pred1, label %if.then, label %if.end
 
 if.then:
-  %1 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8
+  %1 = atomicrmw fadd ptr %p, double %d monotonic, align 8
   br label %if.end
 
 if.end:
   ret void
 }
 
-define amdgpu_kernel void @to(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr {
-; CHECK-LABEL: define amdgpu_kernel void @to(
-; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) local_unnamed_addr {
+define void @to(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) {
+; CHECK-LABEL: define void @to(
+; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[P_GLOBAL:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
 ; CHECK-NEXT:    [[BRMERGE:%.*]] = select i1 [[PRED0]], i1 true, i1 [[PRED1]]
 ; CHECK-NEXT:    br i1 [[BRMERGE]], label %[[IF_END_SINK_SPLIT:.*]], label %[[IF_END:.*]]
 ; CHECK:       [[IF_END_SINK_SPLIT]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr addrspace(1) [[P_GLOBAL]], double [[D]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr [[P]], double [[D]] monotonic, align 8
 ; CHECK-NEXT:    br label %[[IF_END]]
 ; CHECK:       [[IF_END]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %p.global = addrspacecast ptr %p to ptr addrspace(1)
   br i1 %pred0, label %for.body, label %for.body1
 
 for.body:
-  %0 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8
+  %0 = atomicrmw fadd ptr %p, double %d monotonic, align 8
   br label %if.end
 
 for.body1:
   br i1 %pred1, label %if.then, label %if.end
 
 if.then:
-  %1 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
+  %1 = atomicrmw fadd ptr %p, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
   br label %if.end
 
 if.end:

>From c724475ac9cd8fb77a1eb44647059d3be57aa5d4 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 23 May 2025 23:41:23 +0300
Subject: [PATCH 7/7] Fix stray whitespace.

---
 llvm/lib/Transforms/Utils/Local.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index dde0d49e391f6..a55e0021fa9c1 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3456,7 +3456,6 @@ static void combineMetadata(Instruction *K, const Instruction *J,
     K->setMetadata(LLVMContext::MD_prof,
                    MDNode::getMergedProfMetadata(KProf, JProf, K, J));
   }
-
 }
 
 void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,



More information about the llvm-commits mailing list