[llvm] 711a295 - [AMDGPU] Ignore wavefront barrier latency during scheduling DAG mutation (#168500)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 19 00:49:18 PST 2025
Author: Carl Ritson
Date: 2025-11-19T17:49:14+09:00
New Revision: 711a2954799e597c71b86aed8c93167765a5255f
URL: https://github.com/llvm/llvm-project/commit/711a2954799e597c71b86aed8c93167765a5255f
DIFF: https://github.com/llvm/llvm-project/commit/711a2954799e597c71b86aed8c93167765a5255f.diff
LOG: [AMDGPU] Ignore wavefront barrier latency during scheduling DAG mutation (#168500)
Do not add latency for wavefront and singlethread scope fences during
barrier latency DAG mutation.
These scopes do not typically introduce any latency and adjusting
schedules based on them significantly impacts latency hiding.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.cpp
llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.h
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/test/CodeGen/AMDGPU/schedule-barrier-latency.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.cpp b/llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.cpp
index 30a1f05a8a390..2e586ea207af5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.cpp
@@ -27,8 +27,17 @@ using namespace llvm;
namespace {
class BarrierLatency : public ScheduleDAGMutation {
+private:
+ SmallSet<SyncScope::ID, 4> IgnoredScopes;
+
public:
- BarrierLatency() = default;
+ BarrierLatency(MachineFunction *MF) {
+ LLVMContext &Context = MF->getFunction().getContext();
+ IgnoredScopes.insert(SyncScope::SingleThread);
+ IgnoredScopes.insert(Context.getOrInsertSyncScopeID("wavefront"));
+ IgnoredScopes.insert(Context.getOrInsertSyncScopeID("wavefront-one-as"));
+ IgnoredScopes.insert(Context.getOrInsertSyncScopeID("singlethread-one-as"));
+ }
void apply(ScheduleDAGInstrs *DAG) override;
};
@@ -40,8 +49,11 @@ void BarrierLatency::apply(ScheduleDAGInstrs *DAG) {
continue;
// Update latency on barrier edges of ATOMIC_FENCE.
- // We don't consider the scope of the fence or type of instruction
- // involved in the barrier edge.
+ // Ignore scopes not expected to have any latency.
+ SyncScope::ID SSID = static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
+ if (IgnoredScopes.contains(SSID))
+ continue;
+
for (SDep &PredDep : SU.Preds) {
if (!PredDep.isBarrier())
continue;
@@ -68,6 +80,6 @@ void BarrierLatency::apply(ScheduleDAGInstrs *DAG) {
} // end namespace
std::unique_ptr<ScheduleDAGMutation>
-llvm::createAMDGPUBarrierLatencyDAGMutation() {
- return std::make_unique<BarrierLatency>();
+llvm::createAMDGPUBarrierLatencyDAGMutation(MachineFunction *MF) {
+ return std::make_unique<BarrierLatency>(MF);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.h b/llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.h
index c23f0b99fe822..547cd2a11f7df 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.h
@@ -14,7 +14,10 @@
namespace llvm {
-std::unique_ptr<ScheduleDAGMutation> createAMDGPUBarrierLatencyDAGMutation();
+class MachineFunction;
+
+std::unique_ptr<ScheduleDAGMutation>
+createAMDGPUBarrierLatencyDAGMutation(MachineFunction *MF);
} // namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 5ff16e29bbbb1..0346580ffa684 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -647,7 +647,7 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
- DAG->addMutation(createAMDGPUBarrierLatencyDAGMutation());
+ DAG->addMutation(createAMDGPUBarrierLatencyDAGMutation(C->MF));
return DAG;
}
@@ -668,7 +668,7 @@ createGCNMaxMemoryClauseMachineScheduler(MachineSchedContext *C) {
if (ST.shouldClusterStores())
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
- DAG->addMutation(createAMDGPUBarrierLatencyDAGMutation());
+ DAG->addMutation(createAMDGPUBarrierLatencyDAGMutation(C->MF));
return DAG;
}
@@ -1209,7 +1209,7 @@ GCNTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
EnableVOPD)
DAG->addMutation(createVOPDPairingMutation());
DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
- DAG->addMutation(createAMDGPUBarrierLatencyDAGMutation());
+ DAG->addMutation(createAMDGPUBarrierLatencyDAGMutation(C->MF));
return DAG;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-barrier-latency.mir b/llvm/test/CodeGen/AMDGPU/schedule-barrier-latency.mir
index 93f7bcc478737..30cc241b55271 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-barrier-latency.mir
+++ b/llvm/test/CodeGen/AMDGPU/schedule-barrier-latency.mir
@@ -1,16 +1,32 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=postmisched -o - %s | FileCheck %s
+# Ensure syncscope IDs defined in expected order
+# SSID 2 = workgroup
+# SSID 3 = wavefront
+--- |
+ define amdgpu_cs void @test_workgroup() {
+ fence syncscope("workgroup") acq_rel
+ fence syncscope("wavefront") acq_rel
+ ret void
+ }
+ define amdgpu_cs void @test_wavefront() {
+ fence syncscope("workgroup") acq_rel
+ fence syncscope("wavefront") acq_rel
+ ret void
+ }
+...
+
# Ensure WMMA operations stay before the final atomic fence and barrier group.
# This allows the latency of the WMMA operations to be hidden by barrier wait.
---
-name: test
+name: test_workgroup
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0, $sgpr12, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr1_vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7_vgpr8, $vgpr9_vgpr10_vgpr11_vgpr12, $vgpr13_vgpr14_vgpr15_vgpr16, $vgpr17_vgpr18_vgpr19_vgpr20, $vgpr21_vgpr22_vgpr23_vgpr24, $vgpr25_vgpr26_vgpr27_vgpr28, $vgpr29_vgpr30_vgpr31_vgpr32
- ; CHECK-LABEL: name: test
+ ; CHECK-LABEL: name: test_workgroup
; CHECK: liveins: $sgpr0, $sgpr12, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr1_vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7_vgpr8, $vgpr9_vgpr10_vgpr11_vgpr12, $vgpr13_vgpr14_vgpr15_vgpr16, $vgpr17_vgpr18_vgpr19_vgpr20, $vgpr21_vgpr22_vgpr23_vgpr24, $vgpr25_vgpr26_vgpr27_vgpr28, $vgpr29_vgpr30_vgpr31_vgpr32
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: ATOMIC_FENCE 5, 2
@@ -81,3 +97,262 @@ body: |
ATOMIC_FENCE 4, 2
...
+
+# Ensure VALU operations are not unduely redistributed between wavefront fences
+# causing a loss of latency hiding.
+---
+name: test_wavefront
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr1, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr14
+ ; CHECK-LABEL: name: test_wavefront
+ ; CHECK: liveins: $vgpr1, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr14
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 2
+ ; CHECK-NEXT: S_BARRIER
+ ; CHECK-NEXT: BUNDLE implicit killed $vgpr9, implicit killed $vgpr10, implicit killed $vgpr12, implicit $exec, implicit killed $vgpr8, implicit killed $vgpr11, implicit killed $vgpr14 {
+ ; CHECK-NEXT: DS_WRITE2_B32_gfx9 killed $vgpr9, killed $vgpr10, killed $vgpr12, 0, 16, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE2ST64_B32_gfx9 killed $vgpr8, killed $vgpr11, killed $vgpr14, 0, 4, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 2
+ ; CHECK-NEXT: S_BARRIER
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr34, implicit-def $vgpr35, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr34 = DS_READ_B32_gfx9 $vgpr7, 2096, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr35 = DS_READ_B32_gfx9 $vgpr6, 768, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr36, implicit-def $vgpr37, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr36 = DS_READ_B32_gfx9 $vgpr7, 2100, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr37 = DS_READ_B32_gfx9 $vgpr6, 832, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr38, implicit-def $vgpr39, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr38 = DS_READ_B32_gfx9 $vgpr7, 2104, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr39 = DS_READ_B32_gfx9 $vgpr6, 896, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr40, implicit-def $vgpr41, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr40 = DS_READ_B32_gfx9 $vgpr7, 2108, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr41 = DS_READ_B32_gfx9 $vgpr6, 960, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr42, implicit-def $vgpr43, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr42 = DS_READ_B32_gfx9 $vgpr7, 2112, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr43 = DS_READ_B32_gfx9 $vgpr6, 1024, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr44, implicit-def $vgpr45, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr44 = DS_READ_B32_gfx9 $vgpr7, 2116, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr45 = DS_READ_B32_gfx9 $vgpr6, 1088, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr46, implicit-def $vgpr47, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr46 = DS_READ_B32_gfx9 $vgpr7, 2120, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr47 = DS_READ_B32_gfx9 $vgpr6, 1152, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr10, implicit-def $vgpr11, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr10 = DS_READ_B32_gfx9 $vgpr7, 2124, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr11 = DS_READ_B32_gfx9 $vgpr6, 1216, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr12, implicit-def $vgpr13, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr12 = DS_READ_B32_gfx9 $vgpr7, 2128, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr13 = DS_READ_B32_gfx9 $vgpr6, 1280, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr14, implicit-def $vgpr15, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr14 = DS_READ_B32_gfx9 $vgpr7, 2132, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr15 = DS_READ_B32_gfx9 $vgpr6, 1344, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr34, killed $vgpr35, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr16, implicit-def $vgpr17, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr16 = DS_READ_B32_gfx9 $vgpr7, 2136, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr17 = DS_READ_B32_gfx9 $vgpr6, 1408, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr18, implicit-def $vgpr19, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr18 = DS_READ_B32_gfx9 $vgpr7, 2140, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr19 = DS_READ_B32_gfx9 $vgpr6, 1472, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr36, killed $vgpr37, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr20, implicit-def $vgpr21, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr20 = DS_READ_B32_gfx9 $vgpr7, 2144, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr21 = DS_READ_B32_gfx9 $vgpr6, 1536, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr22, implicit-def $vgpr23, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr22 = DS_READ_B32_gfx9 $vgpr7, 2148, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr23 = DS_READ_B32_gfx9 $vgpr6, 1600, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr38, killed $vgpr39, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr24, implicit-def $vgpr25, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr24 = DS_READ_B32_gfx9 $vgpr7, 2152, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr25 = DS_READ_B32_gfx9 $vgpr6, 1664, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr26, implicit-def $vgpr27, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr26 = DS_READ_B32_gfx9 $vgpr7, 2156, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr27 = DS_READ_B32_gfx9 $vgpr6, 1728, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr40, killed $vgpr41, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr28, implicit-def $vgpr29, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr28 = DS_READ_B32_gfx9 $vgpr7, 2160, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr29 = DS_READ_B32_gfx9 $vgpr6, 1792, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr30, implicit-def $vgpr31, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ ; CHECK-NEXT: $vgpr30 = DS_READ_B32_gfx9 $vgpr7, 2164, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr31 = DS_READ_B32_gfx9 $vgpr6, 1856, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: ATOMIC_FENCE 6, 3
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr42, killed $vgpr43, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr32, implicit-def $vgpr33, implicit killed $vgpr7, implicit $exec, implicit killed $vgpr6 {
+ ; CHECK-NEXT: $vgpr32 = DS_READ_B32_gfx9 killed $vgpr7, 2168, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr33 = DS_READ_B32_gfx9 killed $vgpr6, 1920, 0, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr44, killed $vgpr45, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr46, killed $vgpr47, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr10, killed $vgpr11, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr12, killed $vgpr13, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr14, killed $vgpr15, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr16, killed $vgpr17, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr18, killed $vgpr19, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr20, killed $vgpr21, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr22, killed $vgpr23, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr24, killed $vgpr25, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr26, killed $vgpr27, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr28, killed $vgpr29, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr30, killed $vgpr31, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr32, killed $vgpr33, killed $vgpr1, implicit $mode, implicit $exec
+ ATOMIC_FENCE 6, 3
+ ATOMIC_FENCE 6, 2
+ S_BARRIER
+ BUNDLE implicit $vgpr9, implicit killed $vgpr10, implicit killed $vgpr12, implicit $exec, implicit $vgpr8, implicit killed $vgpr11, implicit killed $vgpr14 {
+ DS_WRITE2_B32_gfx9 $vgpr9, killed $vgpr10, killed $vgpr12, 0, 16, 0, implicit $exec
+ DS_WRITE2ST64_B32_gfx9 $vgpr8, killed $vgpr11, killed $vgpr14, 0, 4, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 2
+ S_BARRIER
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr34, implicit-def $vgpr35, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr34 = DS_READ_B32_gfx9 $vgpr7, 2096, 0, implicit $exec
+ $vgpr35 = DS_READ_B32_gfx9 $vgpr6, 768, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr36, implicit-def $vgpr37, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr36 = DS_READ_B32_gfx9 $vgpr7, 2100, 0, implicit $exec
+ $vgpr37 = DS_READ_B32_gfx9 $vgpr6, 832, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr38, implicit-def $vgpr39, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr38 = DS_READ_B32_gfx9 $vgpr7, 2104, 0, implicit $exec
+ $vgpr39 = DS_READ_B32_gfx9 $vgpr6, 896, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr40, implicit-def $vgpr41, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr40 = DS_READ_B32_gfx9 $vgpr7, 2108, 0, implicit $exec
+ $vgpr41 = DS_READ_B32_gfx9 $vgpr6, 960, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr42, implicit-def $vgpr43, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr42 = DS_READ_B32_gfx9 $vgpr7, 2112, 0, implicit $exec
+ $vgpr43 = DS_READ_B32_gfx9 $vgpr6, 1024, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr44, implicit-def $vgpr45, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr44 = DS_READ_B32_gfx9 $vgpr7, 2116, 0, implicit $exec
+ $vgpr45 = DS_READ_B32_gfx9 $vgpr6, 1088, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr46, implicit-def $vgpr47, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr46 = DS_READ_B32_gfx9 $vgpr7, 2120, 0, implicit $exec
+ $vgpr47 = DS_READ_B32_gfx9 $vgpr6, 1152, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr10, implicit-def $vgpr11, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr10 = DS_READ_B32_gfx9 $vgpr7, 2124, 0, implicit $exec
+ $vgpr11 = DS_READ_B32_gfx9 $vgpr6, 1216, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr12, implicit-def $vgpr13, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr12 = DS_READ_B32_gfx9 $vgpr7, 2128, 0, implicit $exec
+ $vgpr13 = DS_READ_B32_gfx9 $vgpr6, 1280, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr14, implicit-def $vgpr15, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr14 = DS_READ_B32_gfx9 $vgpr7, 2132, 0, implicit $exec
+ $vgpr15 = DS_READ_B32_gfx9 $vgpr6, 1344, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr16, implicit-def $vgpr17, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr16 = DS_READ_B32_gfx9 $vgpr7, 2136, 0, implicit $exec
+ $vgpr17 = DS_READ_B32_gfx9 $vgpr6, 1408, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr18, implicit-def $vgpr19, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr18 = DS_READ_B32_gfx9 $vgpr7, 2140, 0, implicit $exec
+ $vgpr19 = DS_READ_B32_gfx9 $vgpr6, 1472, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr20, implicit-def $vgpr21, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr20 = DS_READ_B32_gfx9 $vgpr7, 2144, 0, implicit $exec
+ $vgpr21 = DS_READ_B32_gfx9 $vgpr6, 1536, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr22, implicit-def $vgpr23, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr22 = DS_READ_B32_gfx9 $vgpr7, 2148, 0, implicit $exec
+ $vgpr23 = DS_READ_B32_gfx9 $vgpr6, 1600, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr24, implicit-def $vgpr25, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr24 = DS_READ_B32_gfx9 $vgpr7, 2152, 0, implicit $exec
+ $vgpr25 = DS_READ_B32_gfx9 $vgpr6, 1664, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr26, implicit-def $vgpr27, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr26 = DS_READ_B32_gfx9 $vgpr7, 2156, 0, implicit $exec
+ $vgpr27 = DS_READ_B32_gfx9 $vgpr6, 1728, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr28, implicit-def $vgpr29, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr28 = DS_READ_B32_gfx9 $vgpr7, 2160, 0, implicit $exec
+ $vgpr29 = DS_READ_B32_gfx9 $vgpr6, 1792, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr30, implicit-def $vgpr31, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr30 = DS_READ_B32_gfx9 $vgpr7, 2164, 0, implicit $exec
+ $vgpr31 = DS_READ_B32_gfx9 $vgpr6, 1856, 0, implicit $exec
+ }
+ ATOMIC_FENCE 6, 3
+ BUNDLE implicit-def $vgpr32, implicit-def $vgpr33, implicit $vgpr7, implicit $exec, implicit $vgpr6 {
+ $vgpr32 = DS_READ_B32_gfx9 $vgpr7, 2168, 0, implicit $exec
+ $vgpr33 = DS_READ_B32_gfx9 $vgpr6, 1920, 0, implicit $exec
+ }
+ $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr34, killed $vgpr35, killed $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr36, killed $vgpr37, killed $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr38, killed $vgpr39, killed $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr40, killed $vgpr41, killed $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr42, killed $vgpr43, killed $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr44, killed $vgpr45, killed $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr46, killed $vgpr47, killed $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr10, killed $vgpr11, killed $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr12, killed $vgpr13, killed $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr14, killed $vgpr15, killed $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr16, killed $vgpr17, killed $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr18, killed $vgpr19, killed $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr20, killed $vgpr21, killed $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr22, killed $vgpr23, killed $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr24, killed $vgpr25, killed $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr26, killed $vgpr27, killed $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr28, killed $vgpr29, killed $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr30, killed $vgpr31, killed $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = nofpexcept V_FMAC_F32_e32 killed $vgpr32, killed $vgpr33, killed $vgpr1, implicit $mode, implicit $exec
+...
More information about the llvm-commits
mailing list