[llvm] [AMDGPU] Ignore wavefront barrier latency during scheduling DAG mutation (PR #168500)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 18 00:23:07 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Carl Ritson (perlfu)
<details>
<summary>Changes</summary>
Do not add latency for wavefront and singlethread scope fences during barrier latency DAG mutation.
These scopes do not typically introduce any latency and adjusting schedules based on them significantly impacts latency hiding.
---
Patch is 23.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168500.diff
4 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.cpp (+17-5)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.h (+4-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/schedule-barrier-latency.mir (+277-2)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.cpp b/llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.cpp
index 30a1f05a8a390..2e586ea207af5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.cpp
@@ -27,8 +27,17 @@ using namespace llvm;
namespace {
class BarrierLatency : public ScheduleDAGMutation {
+private:
+ SmallSet<SyncScope::ID, 4> IgnoredScopes;
+
public:
- BarrierLatency() = default;
+ BarrierLatency(MachineFunction *MF) {
+ LLVMContext &Context = MF->getFunction().getContext();
+ IgnoredScopes.insert(SyncScope::SingleThread);
+ IgnoredScopes.insert(Context.getOrInsertSyncScopeID("wavefront"));
+ IgnoredScopes.insert(Context.getOrInsertSyncScopeID("wavefront-one-as"));
+ IgnoredScopes.insert(Context.getOrInsertSyncScopeID("singlethread-one-as"));
+ }
void apply(ScheduleDAGInstrs *DAG) override;
};
@@ -40,8 +49,11 @@ void BarrierLatency::apply(ScheduleDAGInstrs *DAG) {
continue;
// Update latency on barrier edges of ATOMIC_FENCE.
- // We don't consider the scope of the fence or type of instruction
- // involved in the barrier edge.
+ // Ignore scopes not expected to have any latency.
+ SyncScope::ID SSID = static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
+ if (IgnoredScopes.contains(SSID))
+ continue;
+
for (SDep &PredDep : SU.Preds) {
if (!PredDep.isBarrier())
continue;
@@ -68,6 +80,6 @@ void BarrierLatency::apply(ScheduleDAGInstrs *DAG) {
} // end namespace
std::unique_ptr<ScheduleDAGMutation>
-llvm::createAMDGPUBarrierLatencyDAGMutation() {
- return std::make_unique<BarrierLatency>();
+llvm::createAMDGPUBarrierLatencyDAGMutation(MachineFunction *MF) {
+ return std::make_unique<BarrierLatency>(MF);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.h b/llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.h
index c23f0b99fe822..547cd2a11f7df 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.h
@@ -14,7 +14,10 @@
namespace llvm {
-std::unique_ptr<ScheduleDAGMutation> createAMDGPUBarrierLatencyDAGMutation();
+class MachineFunction;
+
+std::unique_ptr<ScheduleDAGMutation>
+createAMDGPUBarrierLatencyDAGMutation(MachineFunction *MF);
} // namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 5ff16e29bbbb1..0346580ffa684 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -647,7 +647,7 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
- DAG->addMutation(createAMDGPUBarrierLatencyDAGMutation());
+ DAG->addMutation(createAMDGPUBarrierLatencyDAGMutation(C->MF));
return DAG;
}
@@ -668,7 +668,7 @@ createGCNMaxMemoryClauseMachineScheduler(MachineSchedContext *C) {
if (ST.shouldClusterStores())
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
- DAG->addMutation(createAMDGPUBarrierLatencyDAGMutation());
+ DAG->addMutation(createAMDGPUBarrierLatencyDAGMutation(C->MF));
return DAG;
}
@@ -1209,7 +1209,7 @@ GCNTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
EnableVOPD)
DAG->addMutation(createVOPDPairingMutation());
DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
- DAG->addMutation(createAMDGPUBarrierLatencyDAGMutation());
+ DAG->addMutation(createAMDGPUBarrierLatencyDAGMutation(C->MF));
return DAG;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-barrier-latency.mir b/llvm/test/CodeGen/AMDGPU/schedule-barrier-latency.mir
index 93f7bcc478737..30cc241b55271 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-barrier-latency.mir
+++ b/llvm/test/CodeGen/AMDGPU/schedule-barrier-latency.mir
@@ -1,16 +1,32 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=postmisched -o - %s | FileCheck %s
+# Ensure syncscope IDs defined in expected order
+# SSID 2 = workgroup
+# SSID 3 = wavefront
+--- |
+ define amdgpu_cs void @test_workgroup() {
+ fence syncscope("workgroup") acq_rel
+ fence syncscope("wavefront") acq_rel
+ ret void
+ }
+ define amdgpu_cs void @test_wavefront() {
+ fence syncscope("workgroup") acq_rel
+ fence syncscope("wavefront") acq_rel
+ ret void
+ }
+...
+
# Ensure WMMA operations stay before the final atomic fence and barrier group.
# This allows the latency of the WMMA operations to be hidden by barrier wait.
---
-name: test
+name: test_workgroup
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0, $sgpr12, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr1_vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7_vgpr8, $vgpr9_vgpr10_vgpr11_vgpr12, $vgpr13_vgpr14_vgpr15_vgpr16, $vgpr17_vgpr18_vgpr19_vgpr20, $vgpr21_vgpr22_vgpr23_vgpr24, $vgpr25_vgpr26_vgpr27_vgpr28, $vgpr29_vgpr30_vgpr31_vgpr32
- ; CHECK-LABEL: name: test
+ ; CHECK-LABEL: name: test_workgroup
; CHECK: liveins: $sgpr0, $sgpr12, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr1_vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7_vgpr8, $vgpr9_vgpr10_vgpr11_vgpr12, $vgpr13_vgpr14_vgpr15_vgpr16, $vgpr17_vgpr18_vgpr19_vgpr20, $vgpr21_vgpr22_vgpr23_vgpr24, $vgpr25_vgpr26_vgpr27_vgpr28, $vgpr29_vgpr30_vgpr31_vgpr32
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: ATOMIC_FENCE 5, 2
@@ -81,3 +97,262 @@ body: |
ATOMIC_FENCE 4, 2
...
+
+# Ensure VALU operations are not unduely redistributed between wavefront fences
+# causing a loss of latency hiding.
+---
+name: test_wavefront
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr1, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr14
+ ; CHECK-LABEL: name: test_wavefront
+ ; CHECK: liveins: $vgpr1, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr14
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 2
+ ; CHECK-NEXT: S_BARRIER
+ ; CHECK-NEXT: BUNDLE implicit killed $vgpr9, implicit killed $vgpr10, implicit killed $vgpr12, implicit $exec, implicit killed $vgpr8, implicit killed $vgpr11, implicit killed $vgpr14 {
+ ; CHECK-NEXT: DS_WRITE2_B32_gfx9 killed $vgpr9, killed $vgpr10, killed $vgpr12, 0, 16, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE2ST64_B32_gfx9 killed $vgpr8, killed $vgpr11, killed $vgpr14, 0, 4, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 2
+ ; CHECK-NEXT: S_BARRIER
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr34, implicit-def $vgpr35, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr34 = DS_READ_B32_gfx9 $vgpr7, 2096, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr35 = DS_READ_B32_gfx9 $vgpr6, 768, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr36, implicit-def $vgpr37, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr36 = DS_READ_B32_gfx9 $vgpr7, 2100, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr37 = DS_READ_B32_gfx9 $vgpr6, 832, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr38, implicit-def $vgpr39, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr38 = DS_READ_B32_gfx9 $vgpr7, 2104, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr39 = DS_READ_B32_gfx9 $vgpr6, 896, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr40, implicit-def $vgpr41, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr40 = DS_READ_B32_gfx9 $vgpr7, 2108, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr41 = DS_READ_B32_gfx9 $vgpr6, 960, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr42, implicit-def $vgpr43, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr42 = DS_READ_B32_gfx9 $vgpr7, 2112, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr43 = DS_READ_B32_gfx9 $vgpr6, 1024, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr44, implicit-def $vgpr45, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr44 = DS_READ_B32_gfx9 $vgpr7, 2116, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr45 = DS_READ_B32_gfx9 $vgpr6, 1088, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr46, implicit-def $vgpr47, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr46 = DS_READ_B32_gfx9 $vgpr7, 2120, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr47 = DS_READ_B32_gfx9 $vgpr6, 1152, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr10, implicit-def $vgpr11, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr10 = DS_READ_B32_gfx9 $vgpr7, 2124, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr11 = DS_READ_B32_gfx9 $vgpr6, 1216, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr12, implicit-def $vgpr13, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr12 = DS_READ_B32_gfx9 $vgpr7, 2128, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr13 = DS_READ_B32_gfx9 $vgpr6, 1280, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr14, implicit-def $vgpr15, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr14 = DS_READ_B32_gfx9 $vgpr7, 2132, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr15 = DS_READ_B32_gfx9 $vgpr6, 1344, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr34, killed $vgpr35, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr16, implicit-def $vgpr17, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr16 = DS_READ_B32_gfx9 $vgpr7, 2136, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr17 = DS_READ_B32_gfx9 $vgpr6, 1408, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr18, implicit-def $vgpr19, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr18 = DS_READ_B32_gfx9 $vgpr7, 2140, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr19 = DS_READ_B32_gfx9 $vgpr6, 1472, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr36, killed $vgpr37, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr20, implicit-def $vgpr21, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr20 = DS_READ_B32_gfx9 $vgpr7, 2144, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr21 = DS_READ_B32_gfx9 $vgpr6, 1536, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr22, implicit-def $vgpr23, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr22 = DS_READ_B32_gfx9 $vgpr7, 2148, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr23 = DS_READ_B32_gfx9 $vgpr6, 1600, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr38, killed $vgpr39, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr24, implicit-def $vgpr25, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr24 = DS_READ_B32_gfx9 $vgpr7, 2152, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr25 = DS_READ_B32_gfx9 $vgpr6, 1664, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr26, implicit-def $vgpr27, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr26 = DS_READ_B32_gfx9 $vgpr7, 2156, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr27 = DS_READ_B32_gfx9 $vgpr6, 1728, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr40, killed $vgpr41, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr28, implicit-def $vgpr29, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr28 = DS_READ_B32_gfx9 $vgpr7, 2160, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr29 = DS_READ_B32_gfx9 $vgpr6, 1792, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr30, implicit-def $vgpr31, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr30 = DS_READ_B32_gfx9 $vgpr7, 2164, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr31 = DS_READ_B32_gfx9 $vgpr6, 1856, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr42, killed $vgpr43, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr32, implicit-def $vgpr33, implicit killed $vgpr7, implicit $exec, implicit killed $vgpr6 {
+ ; CHECK-NEXT: $vgpr32 = DS_READ_B32_gfx9 killed $vgpr7, 2168, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr33 = DS_READ_B32_gfx9 killed $vgpr6, 1920, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr44, killed $vgpr45, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr46, killed $vgpr47, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr10, killed $vgpr11, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr12, killed $vgpr13, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr14, killed $vgpr15, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr16, killed $vgpr17, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr18, killed $vgpr19, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr20, killed $vgpr21, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr22, killed $vgpr23, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr24, killed $vgpr25, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr26, killed $vgpr27, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr28, killed $vgpr29, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr30, killed $vgpr31, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr32, killed $vgpr33, killed $vgpr1, implicit $mode, implicit $exec
+ ATOMIC_FENCE 6, 3
+ ATOMIC_FENCE 6, 2
+ S_BARRIER
+ BUNDLE implicit $vgpr9, implicit killed $vgpr10, implicit killed $vgpr12, implicit $exec, implicit $vgpr8, implicit killed $vgpr11, implicit killed $vgpr14 {
+ DS_WRITE2_B32_gfx9 $vgpr9, killed $vgpr10, killed $vgpr12, 0, 16, 0, implicit $exec
+ DS_WRITE2ST64_B32_gfx9 $vgpr8, killed $vgpr11, killed $vgpr14, 0, 4, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 2
+ S_BARRIER
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr34, implicit-def $vgpr35, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr34 = DS_READ_B32_gfx9 $vgpr7, 2096, 0, implicit $exec
+ $vgpr35 = DS_READ_B32_gfx9 $vgpr6, 768, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr36, implicit-def $vgpr37, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr36 = DS_READ_B32_gfx9 $vgpr7, 2100, 0, implicit $exec
+ $vgpr37 = DS_READ_B32_gfx9 $vgpr6, 832, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr38, implicit-def $vgpr39, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr38 = DS_READ_B32_gfx9 $vgpr7, 2104, 0, implicit $exec
+ $vgpr39 = DS_READ_B32_gfx9 $vgpr6, 896, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr40, implicit-def $vgpr41, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr40 = DS_READ_B32_gfx9 $vgpr7, 2108, 0, implicit $exec
+ $vgpr41 = DS_READ_B32_gfx9 $vgpr6, 960, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr42, implicit-def $vgpr43, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr42 = DS_READ_B32_gfx9 $vgpr7, 2112, 0, implicit $exec
+ $vgpr43 = DS_READ_B32_gfx9 $vgpr6, 1024, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr44, implicit-def $vgpr45, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr44 = DS_READ_B32_gfx9 $vgpr7, 2116, 0, implicit $exec
+ $vgpr45 = DS_READ_B32_gfx9 $vgpr6, 1088, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr46, implicit-def $vgpr47, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr46 = DS_READ_B32_gfx9 $vgpr7, 2120, 0, implicit $exec
+ $vgpr47 = DS_READ_B32_gfx9 $vgpr6, 1152, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr10, implicit-def $vgpr11, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr10 = DS_READ_B32_gfx9 $vgpr7, 2124, 0, implicit $exec
+ $vgpr11 = DS_READ_B32_gfx9 $vgpr6, 1216, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr12, implicit-def $vgpr13, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr12 = DS_READ_B32_gfx9 $vgpr7, 2128, 0, implicit $exec
+ $vgpr13 = DS_READ_B32_gfx9 $vgpr6, 1280, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr14, implicit-def $vgpr15, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr14 = DS_READ_B32_gfx9 $vgpr7, 2132, 0, implicit $exec
+ $vgpr15 = DS_READ_B32_gfx9 $vgpr6, 1344, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr16, implicit-def $vgpr17, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr16 = DS_READ_B32_gfx9 $vgpr7, 2136, 0, implicit $exec
+ $vgpr17 = DS_READ_B32_gfx9 $vgpr6, 1408, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr18, implicit-def $vgpr19, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr18 = DS_READ_B32_gfx9 $vgpr7, 2140, 0, implicit $exec
+ $vgpr19 = DS_READ_B32_gfx9 $vgpr6, 1472, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr20, implicit-def $vgpr21, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr20 = DS_READ_B32_gfx9 $vgpr7, 2144, 0, implicit $exec
+ $vgpr21 = DS_READ_B32_gfx9 $vgpr6, 1536, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr22, implicit-def $vgpr23, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr22 = DS_READ_B32_gfx9 $vgpr7, 2148, 0, implicit $exec
+ $vgpr23 = DS_READ_B32_gfx9 $vgpr6, 1600, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr24, implicit-def $vgpr25, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr24 = DS_READ_B32_gfx9 $vgpr7, 2152, 0, implicit $exec
+ $vgpr25 = DS_READ_B32_gfx9 $vgpr6, 1664, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr26, implicit-def $vgpr27, implicit $vgpr7, implicit $exec, implicit $vgpr...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/168500
More information about the llvm-commits
mailing list