[llvm] 195de44 - [AMDGPU] Strengthen export cluster ordering
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Wed May 13 07:08:05 PDT 2020
Author: Carl Ritson
Date: 2020-05-13T23:07:37+09:00
New Revision: 195de442daba5ae1754528eb2869ca50c3af2669
URL: https://github.com/llvm/llvm-project/commit/195de442daba5ae1754528eb2869ca50c3af2669
DIFF: https://github.com/llvm/llvm-project/commit/195de442daba5ae1754528eb2869ca50c3af2669.diff
LOG: [AMDGPU] Strengthen export cluster ordering
Summary:
When removing barrier edges on exports then dependencies need to
be propagated.
Reviewers: foad
Reviewed By: foad
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D79855
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp
index cbc248fbd9c8..25c82ed61fc2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp
@@ -81,6 +81,32 @@ static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) {
}
}
+static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) {
+ SmallVector<SDep, 2> ToAdd, ToRemove;
+
+ for (const SDep &Pred : SU.Preds) {
+ SUnit *PredSU = Pred.getSUnit();
+ if (Pred.isBarrier() && isExport(*PredSU)) {
+ ToRemove.push_back(Pred);
+ if (isExport(SU))
+ continue;
+
+ // If we remove a barrier we need to copy dependencies
+ // from the predecessor to maintain order.
+ for (const SDep &ExportPred : PredSU->Preds) {
+ SUnit *ExportPredSU = ExportPred.getSUnit();
+ if (ExportPred.isBarrier() && !isExport(*ExportPredSU))
+ ToAdd.push_back(SDep(ExportPredSU, SDep::Barrier));
+ }
+ }
+ }
+
+ for (SDep Pred : ToRemove)
+ SU.removePred(Pred);
+ for (SDep Pred : ToAdd)
+ DAG->addEdge(&SU, Pred);
+}
+
void ExportClustering::apply(ScheduleDAGInstrs *DAG) {
const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
@@ -92,20 +118,18 @@ void ExportClustering::apply(ScheduleDAGInstrs *DAG) {
// on exports. Edges will be added later to order the exports.
unsigned PosCount = 0;
for (SUnit &SU : DAG->SUnits) {
- if (isExport(SU)) {
- Chain.push_back(&SU);
- if (isPositionExport(TII, &SU))
- PosCount++;
- }
+ if (!isExport(SU))
+ continue;
- SmallVector<SDep, 2> ToRemove;
- for (const SDep &Pred : SU.Preds) {
- SUnit *PredSU = Pred.getSUnit();
- if (Pred.isBarrier() && isExport(*PredSU))
- ToRemove.push_back(Pred);
- }
- for (SDep Pred : ToRemove)
- SU.removePred(Pred);
+ Chain.push_back(&SU);
+ if (isPositionExport(TII, &SU))
+ PosCount++;
+
+ removeExportDependencies(DAG, SU);
+
+ SmallVector<SDep, 4> Succs(SU.Succs);
+ for (SDep Succ : Succs)
+ removeExportDependencies(DAG, *Succ.getSUnit());
}
// Apply clustering if there are multiple exports
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
index a9e1f1859a2e..9a62ca5db089 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
@@ -602,6 +602,27 @@ define amdgpu_kernel void @test_export_pos_before_param_across_load(i32 %idx) #0
ret void
}
+; GCN-LABEL: {{^}}test_export_across_store_load:
+; GCN: buffer_store
+; GCN: buffer_load
+; GCN: exp pos0
+; GCN: exp param0
+; GCN: exp param1
+define amdgpu_kernel void @test_export_across_store_load(i32 %idx, float %v) #0 {
+ %data0 = alloca <4 x float>, align 8, addrspace(5)
+ %data1 = alloca <4 x float>, align 8, addrspace(5)
+ %cmp = icmp eq i32 %idx, 1
+ %data = select i1 %cmp, <4 x float> addrspace(5)* %data0, <4 x float> addrspace(5)* %data1
+ %sptr = getelementptr inbounds <4 x float>, <4 x float> addrspace(5)* %data, i32 0, i32 0
+ store float %v, float addrspace(5)* %sptr, align 8
+ call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float 1.0, i1 true, i1 false)
+ %ptr0 = getelementptr inbounds <4 x float>, <4 x float> addrspace(5)* %data0, i32 0, i32 0
+ %load0 = load float, float addrspace(5)* %ptr0, align 8
+ call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
+ call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
+ ret void
+}
+
attributes #0 = { nounwind }
attributes #1 = { nounwind inaccessiblememonly }
attributes #2 = { nounwind readnone }
More information about the llvm-commits
mailing list