[llvm] e3ffe72 - [AMDGPU] Cluster shader exports
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Thu May 7 03:06:51 PDT 2020
Author: Carl Ritson
Date: 2020-05-07T19:05:38+09:00
New Revision: e3ffe7269b6992a23a76b3148cb930c5b62ded88
URL: https://github.com/llvm/llvm-project/commit/e3ffe7269b6992a23a76b3148cb930c5b62ded88
DIFF: https://github.com/llvm/llvm-project/commit/e3ffe7269b6992a23a76b3148cb930c5b62ded88.diff
LOG: [AMDGPU] Cluster shader exports
Summary:
Add DAG scheduling mutation to cluster export instructions.
This avoids unnecessary waitcnts being added when computation
ends up interspersed with exports.
Reviewers: foad, arsenm, rampitec, nhaehnle
Reviewed By: foad
Subscribers: kzhuravl, jvesely, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D79481
Added:
llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp
llvm/lib/Target/AMDGPU/AMDGPUExportClustering.h
Modified:
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/lib/Target/AMDGPU/CMakeLists.txt
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp
new file mode 100644
index 000000000000..42ff12ddda2b
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp
@@ -0,0 +1,92 @@
+//===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains a DAG scheduling mutation to cluster shader
+/// exports.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUExportClustering.h"
+#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class ExportClustering : public ScheduleDAGMutation {
+public:
+ ExportClustering() {}
+ void apply(ScheduleDAGInstrs *DAG) override;
+};
+
+static bool isExport(const SUnit &SU) {
+ const MachineInstr *MI = SU.getInstr();
+ return MI->getOpcode() == AMDGPU::EXP ||
+ MI->getOpcode() == AMDGPU::EXP_DONE;
+}
+
+static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) {
+ // Cluster a series of exports. Also copy all dependencies to the first
+ // export to avoid computation being inserted into the chain.
+ SUnit *ChainHead = Exports[0];
+ for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) {
+ SUnit *SUa = Exports[Idx];
+ SUnit *SUb = Exports[Idx + 1];
+ if (DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
+ for (const SDep &Pred : SUb->Preds) {
+ SUnit *PredSU = Pred.getSUnit();
+ if (Pred.isWeak() || isExport(*PredSU))
+ continue;
+ DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial));
+ }
+ }
+ }
+}
+
+void ExportClustering::apply(ScheduleDAGInstrs *DAG) {
+ SmallVector<SmallVector<SUnit *, 8>, 4> ExportChains;
+ DenseMap<unsigned, unsigned> ChainMap;
+
+ // Build chains of exports
+ for (SUnit &SU : DAG->SUnits) {
+ if (!isExport(SU))
+ continue;
+
+ unsigned ChainID = ExportChains.size();
+ for (const SDep &Pred : SU.Preds) {
+ const SUnit &PredSU = *Pred.getSUnit();
+ if (isExport(PredSU) && !Pred.isArtificial()) {
+ ChainID = ChainMap.lookup(PredSU.NodeNum);
+ break;
+ }
+ }
+ ChainMap[SU.NodeNum] = ChainID;
+
+ if (ChainID == ExportChains.size())
+ ExportChains.push_back(SmallVector<SUnit *, 8>());
+
+ auto &Chain = ExportChains[ChainID];
+ Chain.push_back(&SU);
+ }
+
+ // Apply clustering
+ for (auto &Chain : ExportChains)
+ buildCluster(Chain, DAG);
+}
+
+} // end namespace
+
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation() {
+ return std::make_unique<ExportClustering>();
+}
+
+} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.h b/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.h
new file mode 100644
index 000000000000..58491d0671e4
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.h
@@ -0,0 +1,15 @@
+//===- AMDGPUExportClustering.h - AMDGPU Export Clustering ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation();
+
+} // namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 3db52afff861..b5a2abcd7a82 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -16,6 +16,7 @@
#include "AMDGPU.h"
#include "AMDGPUAliasAnalysis.h"
#include "AMDGPUCallLowering.h"
+#include "AMDGPUExportClustering.h"
#include "AMDGPUInstructionSelector.h"
#include "AMDGPULegalizerInfo.h"
#include "AMDGPUMacroFusion.h"
@@ -283,6 +284,7 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
+ DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
return DAG;
}
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index d9c900bb7446..c273ea89bd91 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -42,6 +42,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUAtomicOptimizer.cpp
AMDGPUCallLowering.cpp
AMDGPUCodeGenPrepare.cpp
+ AMDGPUExportClustering.cpp
AMDGPUFixFunctionBitcasts.cpp
AMDGPUFrameLowering.cpp
AMDGPUHSAMetadataStreamer.cpp
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
index 1bbd209c21af..d6d80246a89b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
@@ -542,14 +542,13 @@ end:
; GCN-LABEL: {{^}}test_export_clustering:
; GCN-DAG: v_mov_b32_e32 [[W0:v[0-9]+]], 0
+; GCN-DAG: v_mov_b32_e32 [[W1:v[0-9]+]], 1.0
; GCN-DAG: v_mov_b32_e32 [[X:v[0-9]+]], s0
; GCN-DAG: v_mov_b32_e32 [[Y:v[0-9]+]], s1
; GCN-DAG: v_add_f32_e32 [[Z0:v[0-9]+]]
-; GCN-DAG: exp param0 [[X]], [[Y]], [[Z0]], [[W0]]{{$}}
; GCN-DAG: v_sub_f32_e32 [[Z1:v[0-9]+]]
-; GCN: s_waitcnt expcnt(0)
-; GCN: v_mov_b32_e32 [[W1:v[0-9]+]], 1.0
-; GCN: exp param1 [[X]], [[Y]], [[Z1]], [[W1]] done{{$}}
+; GCN: exp param0 [[X]], [[Y]], [[Z0]], [[W0]]{{$}}
+; GCN-NEXT: exp param1 [[X]], [[Y]], [[Z1]], [[W1]] done{{$}}
define amdgpu_kernel void @test_export_clustering(float %x, float %y) #0 {
%z0 = fadd float %x, %y
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %x, float %y, float %z0, float 0.0, i1 false, i1 false)
More information about the llvm-commits
mailing list