[llvm] [llvm][opt][Transforms] Preserve AMDGPU atomic metadata (PR #140314)
Alex Voicu via llvm-commits
llvm-commits at lists.llvm.org
Fri May 23 13:41:43 PDT 2025
https://github.com/AlexVlx updated https://github.com/llvm/llvm-project/pull/140314
>From 31ceadae74d5df05d9baf501eaab27cc853bc806 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 16 May 2025 23:12:23 +0100
Subject: [PATCH 1/7] Do not drop atomic metadata when combining instructions.
---
llvm/lib/Transforms/Utils/Local.cpp | 11 ++++-
.../SimplifyCFG/merge-amdgpu-atomic-md.ll | 42 +++++++++++++++++++
2 files changed, 52 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 3dbd605e19c3a..0d2a82d407170 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3303,6 +3303,12 @@ static void combineMetadata(Instruction *K, const Instruction *J,
bool DoesKMove, bool AAOnly = false) {
SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
K->getAllMetadataOtherThanDebugLoc(Metadata);
+
+ const unsigned AMDGPUMD[] = {
+ K->getContext().getMDKindID("amdgpu.no.fine.grained.memory"),
+ K->getContext().getMDKindID("amdgpu.no.remote.memory"),
+ K->getContext().getMDKindID("amdgpu.ignore.denormal.mode")};
+
for (const auto &MD : Metadata) {
unsigned Kind = MD.first;
MDNode *JMD = J->getMetadata(Kind);
@@ -3311,7 +3317,10 @@ static void combineMetadata(Instruction *K, const Instruction *J,
// TODO: Assert that this switch is exhaustive for fixed MD kinds.
switch (Kind) {
default:
- K->setMetadata(Kind, nullptr); // Remove unknown metadata
+ if (K->isAtomic() && (find(AMDGPUMD, Kind) != std::cend(AMDGPUMD)))
+ break; // Preserve AMDGPU atomic metadata.
+ else
+ K->setMetadata(Kind, nullptr); // Remove unknown metadata
break;
case LLVMContext::MD_dbg:
llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg");
diff --git a/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll b/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
new file mode 100644
index 0000000000000..1cd574e714b43
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+;; Test to ensure that AMDGPU atomic related metadata is not dropped when
+;; instructions are sunk. Currently the metadata from the first instruction
+;; is kept, which prevents full loss of optimisation information.
+
+; RUN: opt < %s -passes=simplifycfg -passes=simplifycfg -sink-common-insts -S | FileCheck %s
+
+define amdgpu_kernel void @f(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr {
+; CHECK-LABEL: define amdgpu_kernel void @f(
+; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[P_GLOBAL:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
+; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[PRED0]], i1 true, i1 [[PRED1]]
+; CHECK-NEXT: br i1 [[BRMERGE]], label %[[IF_END_SINK_SPLIT:.*]], label %[[IF_END:.*]]
+; CHECK: [[IF_END_SINK_SPLIT]]:
+; CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspace(1) [[P_GLOBAL]], double [[D]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]], !amdgpu.no.remote.memory [[META0]]
+; CHECK-NEXT: br label %[[IF_END]]
+; CHECK: [[IF_END]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %p.global = addrspacecast ptr %p to ptr addrspace(1)
+ br i1 %pred0, label %for.body, label %for.body1
+
+for.body:
+ %0 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
+ br label %if.end
+
+for.body1:
+ br i1 %pred1, label %if.then, label %if.end
+
+if.then:
+ %1 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
+ br label %if.end
+
+if.end:
+ ret void
+}
+
+!0 = !{!"float", !1, i64 0}
+!1 = !{!"omnipotent char", !2, i64 0}
+!2 = !{!"Simple C++ TBAA"}
>From 9e7ca22c6ab92921d934864fe7c6d8ecf2f1d40c Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 16 May 2025 23:20:37 +0100
Subject: [PATCH 2/7] Fix formatting.
---
llvm/lib/Transforms/Utils/Local.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 0d2a82d407170..9b71005dff9fd 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3318,7 +3318,7 @@ static void combineMetadata(Instruction *K, const Instruction *J,
switch (Kind) {
default:
if (K->isAtomic() && (find(AMDGPUMD, Kind) != std::cend(AMDGPUMD)))
- break; // Preserve AMDGPU atomic metadata.
+ break; // Preserve AMDGPU atomic metadata.
else
K->setMetadata(Kind, nullptr); // Remove unknown metadata
break;
>From 8ea68b4628b3128a9690ba673a6805ab9eac795c Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Tue, 20 May 2025 02:22:05 +0300
Subject: [PATCH 3/7] Clean up test.
---
llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll b/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
index 1cd574e714b43..b4cd01a38d118 100644
--- a/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
+++ b/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
@@ -3,7 +3,7 @@
;; instructions are sunk. Currently the metadata from the first instruction
;; is kept, which prevents full loss of optimisation information.
-; RUN: opt < %s -passes=simplifycfg -passes=simplifycfg -sink-common-insts -S | FileCheck %s
+; RUN: opt < %s -passes=simplifycfg -sink-common-insts -S | FileCheck %s
define amdgpu_kernel void @f(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr {
; CHECK-LABEL: define amdgpu_kernel void @f(
@@ -37,6 +37,4 @@ if.end:
ret void
}
-!0 = !{!"float", !1, i64 0}
-!1 = !{!"omnipotent char", !2, i64 0}
-!2 = !{!"Simple C++ TBAA"}
+!0 = !{}
>From c6cfed671d44755cd4156f2aa1cb23be8955eb57 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Tue, 20 May 2025 03:00:39 +0300
Subject: [PATCH 4/7] Handle all cases.
---
llvm/lib/Transforms/Utils/Local.cpp | 23 ++++--
.../SimplifyCFG/merge-amdgpu-atomic-md.ll | 71 ++++++++++++++++++-
2 files changed, 86 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 9b71005dff9fd..7967b6c3100b9 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3304,11 +3304,11 @@ static void combineMetadata(Instruction *K, const Instruction *J,
SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
K->getAllMetadataOtherThanDebugLoc(Metadata);
- const unsigned AMDGPUMD[] = {
- K->getContext().getMDKindID("amdgpu.no.fine.grained.memory"),
- K->getContext().getMDKindID("amdgpu.no.remote.memory"),
- K->getContext().getMDKindID("amdgpu.ignore.denormal.mode")};
-
+ const auto IsAMDGPUMD = [=](unsigned Kind) {
+ return Kind == K->getContext().getMDKindID("amdgpu.no.fine.grained.memory")
+ || Kind == K->getContext().getMDKindID("amdgpu.no.remote.memory")
+ || Kind == K->getContext().getMDKindID("amdgpu.ignore.denormal.mode");
+ };
for (const auto &MD : Metadata) {
unsigned Kind = MD.first;
MDNode *JMD = J->getMetadata(Kind);
@@ -3317,7 +3317,7 @@ static void combineMetadata(Instruction *K, const Instruction *J,
// TODO: Assert that this switch is exhaustive for fixed MD kinds.
switch (Kind) {
default:
- if (K->isAtomic() && (find(AMDGPUMD, Kind) != std::cend(AMDGPUMD)))
+ if (K->isAtomic() && IsAMDGPUMD(Kind))
break; // Preserve AMDGPU atomic metadata.
else
K->setMetadata(Kind, nullptr); // Remove unknown metadata
@@ -3455,6 +3455,17 @@ static void combineMetadata(Instruction *K, const Instruction *J,
K->setMetadata(LLVMContext::MD_prof,
MDNode::getMergedProfMetadata(KProf, JProf, K, J));
}
+
+ // Preserve AMDGPU atomic metadata from J, if present. K might already be
+ // carrying this but overwriting should cause no issue.
+ if (K->isAtomic()) {
+ if (auto *JMD = J->getMetadata("amdgpu.no.fine.grained.memory"))
+ K->setMetadata("amdgpu.no.fine.grained.memory", JMD);
+ if (auto *JMD = J->getMetadata("amdgpu.no.remote.memory"))
+ K->setMetadata("amdgpu.no.remote.memory", JMD);
+ if (auto *JMD = J->getMetadata("amdgpu.ignore.denormal.mode"))
+ K->setMetadata("amdgpu.ignore.denormal.mode", JMD);
+ }
}
void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
diff --git a/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll b/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
index b4cd01a38d118..b387fddc99ad6 100644
--- a/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
+++ b/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
@@ -5,8 +5,8 @@
; RUN: opt < %s -passes=simplifycfg -sink-common-insts -S | FileCheck %s
-define amdgpu_kernel void @f(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr {
-; CHECK-LABEL: define amdgpu_kernel void @f(
+define amdgpu_kernel void @both(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr {
+; CHECK-LABEL: define amdgpu_kernel void @both(
; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) local_unnamed_addr {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[P_GLOBAL:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
@@ -37,4 +37,71 @@ if.end:
ret void
}
+define amdgpu_kernel void @from(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr {
+; CHECK-LABEL: define amdgpu_kernel void @from(
+; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[P_GLOBAL:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
+; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[PRED0]], i1 true, i1 [[PRED1]]
+; CHECK-NEXT: br i1 [[BRMERGE]], label %[[IF_END_SINK_SPLIT:.*]], label %[[IF_END:.*]]
+; CHECK: [[IF_END_SINK_SPLIT]]:
+; CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspace(1) [[P_GLOBAL]], double [[D]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; CHECK-NEXT: br label %[[IF_END]]
+; CHECK: [[IF_END]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %p.global = addrspacecast ptr %p to ptr addrspace(1)
+ br i1 %pred0, label %for.body, label %for.body1
+
+for.body:
+ %0 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
+ br label %if.end
+
+for.body1:
+ br i1 %pred1, label %if.then, label %if.end
+
+if.then:
+ %1 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8
+ br label %if.end
+
+if.end:
+ ret void
+}
+
+define amdgpu_kernel void @to(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr {
+; CHECK-LABEL: define amdgpu_kernel void @to(
+; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[P_GLOBAL:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
+; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[PRED0]], i1 true, i1 [[PRED1]]
+; CHECK-NEXT: br i1 [[BRMERGE]], label %[[IF_END_SINK_SPLIT:.*]], label %[[IF_END:.*]]
+; CHECK: [[IF_END_SINK_SPLIT]]:
+; CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspace(1) [[P_GLOBAL]], double [[D]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; CHECK-NEXT: br label %[[IF_END]]
+; CHECK: [[IF_END]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %p.global = addrspacecast ptr %p to ptr addrspace(1)
+ br i1 %pred0, label %for.body, label %for.body1
+
+for.body:
+ %0 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8
+ br label %if.end
+
+for.body1:
+ br i1 %pred1, label %if.then, label %if.end
+
+if.then:
+ %1 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
+ br label %if.end
+
+if.end:
+ ret void
+}
+
!0 = !{}
+;.
+; CHECK: [[META0]] = !{}
+;.
>From 401b882a1841ad884df4d60faefddd15f20724a0 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Tue, 20 May 2025 11:12:25 +0300
Subject: [PATCH 5/7] Fix formatting.
---
llvm/lib/Transforms/Utils/Local.cpp | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 7967b6c3100b9..13f96981da64d 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3305,9 +3305,10 @@ static void combineMetadata(Instruction *K, const Instruction *J,
K->getAllMetadataOtherThanDebugLoc(Metadata);
const auto IsAMDGPUMD = [=](unsigned Kind) {
- return Kind == K->getContext().getMDKindID("amdgpu.no.fine.grained.memory")
- || Kind == K->getContext().getMDKindID("amdgpu.no.remote.memory")
- || Kind == K->getContext().getMDKindID("amdgpu.ignore.denormal.mode");
+ return Kind ==
+ K->getContext().getMDKindID("amdgpu.no.fine.grained.memory") ||
+ Kind == K->getContext().getMDKindID("amdgpu.no.remote.memory") ||
+ Kind == K->getContext().getMDKindID("amdgpu.ignore.denormal.mode");
};
for (const auto &MD : Metadata) {
unsigned Kind = MD.first;
>From c3096202f05f7142a2060a230ebf9fb162d184c4 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 23 May 2025 23:36:21 +0300
Subject: [PATCH 6/7] Intersection, not union.
---
llvm/lib/Transforms/Utils/Local.cpp | 12 +-----
.../SimplifyCFG/merge-amdgpu-atomic-md.ll | 42 ++++++++-----------
2 files changed, 19 insertions(+), 35 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 13f96981da64d..dde0d49e391f6 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3319,7 +3319,7 @@ static void combineMetadata(Instruction *K, const Instruction *J,
switch (Kind) {
default:
if (K->isAtomic() && IsAMDGPUMD(Kind))
- break; // Preserve AMDGPU atomic metadata.
+ K->setMetadata(Kind, MDNode::intersect(JMD, KMD));
else
K->setMetadata(Kind, nullptr); // Remove unknown metadata
break;
@@ -3457,16 +3457,6 @@ static void combineMetadata(Instruction *K, const Instruction *J,
MDNode::getMergedProfMetadata(KProf, JProf, K, J));
}
- // Preserve AMDGPU atomic metadata from J, if present. K might already be
- // carrying this but overwriting should cause no issue.
- if (K->isAtomic()) {
- if (auto *JMD = J->getMetadata("amdgpu.no.fine.grained.memory"))
- K->setMetadata("amdgpu.no.fine.grained.memory", JMD);
- if (auto *JMD = J->getMetadata("amdgpu.no.remote.memory"))
- K->setMetadata("amdgpu.no.remote.memory", JMD);
- if (auto *JMD = J->getMetadata("amdgpu.ignore.denormal.mode"))
- K->setMetadata("amdgpu.ignore.denormal.mode", JMD);
- }
}
void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
diff --git a/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll b/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
index b387fddc99ad6..56f188146c246 100644
--- a/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
+++ b/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
@@ -5,96 +5,90 @@
; RUN: opt < %s -passes=simplifycfg -sink-common-insts -S | FileCheck %s
-define amdgpu_kernel void @both(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr {
-; CHECK-LABEL: define amdgpu_kernel void @both(
-; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) local_unnamed_addr {
+define void @both(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) {
+; CHECK-LABEL: define void @both(
+; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[P_GLOBAL:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[PRED0]], i1 true, i1 [[PRED1]]
; CHECK-NEXT: br i1 [[BRMERGE]], label %[[IF_END_SINK_SPLIT:.*]], label %[[IF_END:.*]]
; CHECK: [[IF_END_SINK_SPLIT]]:
-; CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspace(1) [[P_GLOBAL]], double [[D]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]], !amdgpu.no.remote.memory [[META0]]
+; CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr [[P]], double [[D]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]], !amdgpu.no.remote.memory [[META0]]
; CHECK-NEXT: br label %[[IF_END]]
; CHECK: [[IF_END]]:
; CHECK-NEXT: ret void
;
entry:
- %p.global = addrspacecast ptr %p to ptr addrspace(1)
br i1 %pred0, label %for.body, label %for.body1
for.body:
- %0 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
+ %0 = atomicrmw fadd ptr %p, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
br label %if.end
for.body1:
br i1 %pred1, label %if.then, label %if.end
if.then:
- %1 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
+ %1 = atomicrmw fadd ptr %p, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
br label %if.end
if.end:
ret void
}
-define amdgpu_kernel void @from(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr {
-; CHECK-LABEL: define amdgpu_kernel void @from(
-; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) local_unnamed_addr {
+define void @from(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) {
+; CHECK-LABEL: define void @from(
+; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[P_GLOBAL:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[PRED0]], i1 true, i1 [[PRED1]]
; CHECK-NEXT: br i1 [[BRMERGE]], label %[[IF_END_SINK_SPLIT:.*]], label %[[IF_END:.*]]
; CHECK: [[IF_END_SINK_SPLIT]]:
-; CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspace(1) [[P_GLOBAL]], double [[D]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr [[P]], double [[D]] monotonic, align 8
; CHECK-NEXT: br label %[[IF_END]]
; CHECK: [[IF_END]]:
; CHECK-NEXT: ret void
;
entry:
- %p.global = addrspacecast ptr %p to ptr addrspace(1)
br i1 %pred0, label %for.body, label %for.body1
for.body:
- %0 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
+ %0 = atomicrmw fadd ptr %p, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
br label %if.end
for.body1:
br i1 %pred1, label %if.then, label %if.end
if.then:
- %1 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8
+ %1 = atomicrmw fadd ptr %p, double %d monotonic, align 8
br label %if.end
if.end:
ret void
}
-define amdgpu_kernel void @to(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr {
-; CHECK-LABEL: define amdgpu_kernel void @to(
-; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) local_unnamed_addr {
+define void @to(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) {
+; CHECK-LABEL: define void @to(
+; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[P_GLOBAL:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[PRED0]], i1 true, i1 [[PRED1]]
; CHECK-NEXT: br i1 [[BRMERGE]], label %[[IF_END_SINK_SPLIT:.*]], label %[[IF_END:.*]]
; CHECK: [[IF_END_SINK_SPLIT]]:
-; CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspace(1) [[P_GLOBAL]], double [[D]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
+; CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr [[P]], double [[D]] monotonic, align 8
; CHECK-NEXT: br label %[[IF_END]]
; CHECK: [[IF_END]]:
; CHECK-NEXT: ret void
;
entry:
- %p.global = addrspacecast ptr %p to ptr addrspace(1)
br i1 %pred0, label %for.body, label %for.body1
for.body:
- %0 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8
+ %0 = atomicrmw fadd ptr %p, double %d monotonic, align 8
br label %if.end
for.body1:
br i1 %pred1, label %if.then, label %if.end
if.then:
- %1 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
+ %1 = atomicrmw fadd ptr %p, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
br label %if.end
if.end:
>From c724475ac9cd8fb77a1eb44647059d3be57aa5d4 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 23 May 2025 23:41:23 +0300
Subject: [PATCH 7/7] Fix stray whitespace.
---
llvm/lib/Transforms/Utils/Local.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index dde0d49e391f6..a55e0021fa9c1 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3456,7 +3456,6 @@ static void combineMetadata(Instruction *K, const Instruction *J,
K->setMetadata(LLVMContext::MD_prof,
MDNode::getMergedProfMetadata(KProf, JProf, K, J));
}
-
}
void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
More information about the llvm-commits
mailing list