[llvm] r223717 - MISched: Fix moving stores across barriers
Tom Stellard
thomas.stellard at amd.com
Mon Dec 8 15:36:48 PST 2014
Author: tstellar
Date: Mon Dec 8 17:36:48 2014
New Revision: 223717
URL: http://llvm.org/viewvc/llvm-project?rev=223717&view=rev
Log:
MISched: Fix moving stores across barriers
This fixes an issue with ScheduleDAGInstrs::buildSchedGraph
where stores without an underlying object would not be added
as a predecessor to the current BarrierChain.
Added:
llvm/trunk/test/CodeGen/R600/store-barrier.ll
Modified:
llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
Modified: llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp?rev=223717&r1=223716&r2=223717&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp (original)
+++ llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp Mon Dec 8 17:36:48 2014
@@ -920,6 +920,13 @@ void ScheduleDAGInstrs::buildSchedGraph(
AliasMemDefs.clear();
AliasMemUses.clear();
} else if (MI->mayStore()) {
+ // Add dependence on barrier chain, if needed.
+ // There is no point to check aliasing on barrier event. Even if
+ // SU and barrier _could_ be reordered, they should not. In addition,
+ // we have lost all RejectMemNodes below barrier.
+ if (BarrierChain)
+ BarrierChain->addPred(SDep(SU, SDep::Barrier));
+
UnderlyingObjectsVector Objs;
getUnderlyingObjectsForInstr(MI, MFI, Objs);
@@ -989,12 +996,6 @@ void ScheduleDAGInstrs::buildSchedGraph(
adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
TrueMemOrderLatency);
}
- // Add dependence on barrier chain, if needed.
- // There is no point to check aliasing on barrier event. Even if
- // SU and barrier _could_ be reordered, they should not. In addition,
- // we have lost all RejectMemNodes below barrier.
- if (BarrierChain)
- BarrierChain->addPred(SDep(SU, SDep::Barrier));
} else if (MI->mayLoad()) {
bool MayAlias = true;
if (MI->isInvariantLoad(AA)) {
Added: llvm/trunk/test/CodeGen/R600/store-barrier.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/store-barrier.ll?rev=223717&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/store-barrier.ll (added)
+++ llvm/trunk/test/CodeGen/R600/store-barrier.ll Mon Dec 8 17:36:48 2014
@@ -0,0 +1,42 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck --check-prefix=CHECK %s
+; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck --check-prefix=CHECK %s
+
+; This test is for a bug in the machine scheduler where stores without
+; an underlying object would be moved across the barrier. In this
+; test, the <2 x i8> store will be split into two i8 stores, so they
+; won't have an underlying object.
+
+; CHECK-LABEL: {{^}}test:
+; CHECK: ds_write_b8
+; CHECK: ds_write_b8
+; CHECK: s_barrier
+; CHECK: s_endpgm
+; Function Attrs: nounwind
+define void @test(<2 x i8> addrspace(3)* nocapture %arg, <2 x i8> addrspace(1)* nocapture readonly %arg1, i32 addrspace(1)* nocapture readonly %arg2, <2 x i8> addrspace(1)* nocapture %arg3, i32 %arg4, i64 %tmp9) {
+bb:
+ %tmp10 = getelementptr inbounds i32 addrspace(1)* %arg2, i64 %tmp9
+ %tmp13 = load i32 addrspace(1)* %tmp10, align 2
+ %tmp14 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp13
+ %tmp15 = load <2 x i8> addrspace(3)* %tmp14, align 2
+ %tmp16 = add i32 %tmp13, 1
+ %tmp17 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp16
+ store <2 x i8> %tmp15, <2 x i8> addrspace(3)* %tmp17, align 2
+ tail call void @llvm.AMDGPU.barrier.local() #2
+ %tmp25 = load i32 addrspace(1)* %tmp10, align 4
+ %tmp26 = sext i32 %tmp25 to i64
+ %tmp27 = sext i32 %arg4 to i64
+ %tmp28 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 %arg4
+ %tmp29 = load i8 addrspace(3)* %tmp28, align 1
+ %tmp30 = getelementptr inbounds <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 %tmp27
+ store i8 %tmp29, i8 addrspace(1)* %tmp30, align 1
+ %tmp32 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 0
+ %tmp33 = load i8 addrspace(3)* %tmp32, align 1
+ %tmp35 = getelementptr inbounds <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 0
+ store i8 %tmp33, i8 addrspace(1)* %tmp35, align 1
+ ret void
+}
+
+; Function Attrs: noduplicate nounwind
+declare void @llvm.AMDGPU.barrier.local() #2
+
+attributes #2 = { noduplicate nounwind }
More information about the llvm-commits
mailing list