[llvm] r287021 - AMDGPU: Enable store clustering
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 15 12:22:55 PST 2016
Author: arsenm
Date: Tue Nov 15 14:22:55 2016
New Revision: 287021
URL: http://llvm.org/viewvc/llvm-project?rev=287021&view=rev
Log:
AMDGPU: Enable store clustering
Also respect the TII hook for these like the generic code does
in case we want a flag later to disable this.
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp?rev=287021&r1=287020&r2=287021&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp Tue Nov 15 14:22:55 2016
@@ -36,6 +36,10 @@ bool AMDGPUInstrInfo::enableClusterLoads
return true;
}
+bool AMDGPUInstrInfo::enableClusterStores() const {
+ return true;
+}
+
// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
// the first 16 loads will be interleaved with the stores, and the next 16 will
// be clustered as expected. It should really split into 2 16 store batches.
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.h?rev=287021&r1=287020&r2=287021&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.h Tue Nov 15 14:22:55 2016
@@ -40,6 +40,7 @@ public:
explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
bool enableClusterLoads() const override;
+ bool enableClusterStores() const override;
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
int64_t Offset1, int64_t Offset2,
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=287021&r1=287020&r2=287021&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Tue Nov 15 14:22:55 2016
@@ -102,7 +102,14 @@ static ScheduleDAGInstrs *
createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
ScheduleDAGMILive *DAG =
new ScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C));
- DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
+ if (TII->enableClusterLoads())
+ DAG->addMutation(createLoadClusterDAGMutation(TII, DAG->TRI));
+
+ if (TII->enableClusterStores())
+ DAG->addMutation(createStoreClusterDAGMutation(TII, DAG->TRI));
+
return DAG;
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll?rev=287021&r1=287020&r2=287021&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll Tue Nov 15 14:22:55 2016
@@ -207,13 +207,21 @@ define void @reorder_global_offsets(i32
}
; FUNC-LABEL: {{^}}reorder_global_offsets_addr64_soffset0:
-; GCN: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64{{$}}
-; GCN: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:20{{$}}
; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:12{{$}}
-; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:28{{$}}
-; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:44{{$}}
+; GCN-NEXT: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:28{{$}}
+; GCN-NEXT: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:44{{$}}
+
+; GCN: v_mov_b32
+; GCN: v_mov_b32
+
+; GCN: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64{{$}}
+; GCN-NEXT: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:20{{$}}
+
+; GCN: v_add_i32
+; GCN: v_add_i32
+
; GCN: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:36{{$}}
-; GCN: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:52{{$}}
+; GCN-NEXT: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:52{{$}}
define void @reorder_global_offsets_addr64_soffset0(i32 addrspace(1)* noalias nocapture %ptr.base) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
%id.ext = sext i32 %id to i64
More information about the llvm-commits
mailing list