[PATCH] MISched: Fix moving stores across barriers
Tom Stellard
tom at stellard.net
Mon Dec 8 15:41:01 PST 2014
r223717.
On Wed, Dec 03, 2014 at 11:08:32PM -0800, Andrew Trick wrote:
> Thanks Tom. Looks like a “goto” bug. buildSchedGraph needs to be rewritten but I never found the occasion to do it.
>
> Hal already pointed out the minor issues with your patch that I noticed, so LGTM.
>
> -Andy
>
> > On Dec 2, 2014, at 12:32 PM, Tom Stellard <thomas.stellard at amd.com> wrote:
> >
> > This fixes an issue with ScheduleDAGInstrs::buildSchedGraph
> > where stores without an underlying object would not be added
> > as a predecessor to the current BarrierChain.
> > ---
> > lib/CodeGen/ScheduleDAGInstrs.cpp | 9 +++++--
> > test/CodeGen/R600/store-barrier.ll | 52 ++++++++++++++++++++++++++++++++++++++
> > 2 files changed, 59 insertions(+), 2 deletions(-)
> > create mode 100644 test/CodeGen/R600/store-barrier.ll
> >
> > diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
> > index d8d8422..ee8b5c2 100644
> > --- a/lib/CodeGen/ScheduleDAGInstrs.cpp
> > +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
> > @@ -794,6 +794,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
> > for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
> > MII != MIE; --MII) {
> > MachineInstr *MI = std::prev(MII);
> > +
> > if (MI && DbgMI) {
> > DbgValues.push_back(std::make_pair(DbgMI, MI));
> > DbgMI = nullptr;
> > @@ -920,6 +921,12 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
> > AliasMemDefs.clear();
> > AliasMemUses.clear();
> > } else if (MI->mayStore()) {
> > + // Add dependence on barrier chain, if needed.
> > + // There is no point to check aliasing on barrier event. Even if
> > + // SU and barrier _could_ be reordered, they should not. In addition,
> > + // we have lost all RejectMemNodes below barrier.
> > + if (BarrierChain)
> > + BarrierChain->addPred(SDep(SU, SDep::Barrier));
> > UnderlyingObjectsVector Objs;
> > getUnderlyingObjectsForInstr(MI, MFI, Objs);
> >
> > @@ -993,8 +1000,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
> > // There is no point to check aliasing on barrier event. Even if
> > // SU and barrier _could_ be reordered, they should not. In addition,
> > // we have lost all RejectMemNodes below barrier.
> > - if (BarrierChain)
> > - BarrierChain->addPred(SDep(SU, SDep::Barrier));
> > } else if (MI->mayLoad()) {
> > bool MayAlias = true;
> > if (MI->isInvariantLoad(AA)) {
> > diff --git a/test/CodeGen/R600/store-barrier.ll b/test/CodeGen/R600/store-barrier.ll
> > new file mode 100644
> > index 0000000..229cd8f
> > --- /dev/null
> > +++ b/test/CodeGen/R600/store-barrier.ll
> > @@ -0,0 +1,52 @@
> > +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck --check-prefix=CHECK %s
> > +; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck --check-prefix=CHECK %s
> > +
> > +; This test is for a bug in the machine scheduler where stores without
> > +; an underlying object would be moved across the barrier. In this
> > +; test, the <2 x i8> store will be split into two i8 stores, so they
> > +; won't have an underlying object.
> > +
> > +; CHECK-LABEL: {{^}}test:
> > +; CHECK: ds_write_b8
> > +; CHECK: ds_write_b8
> > +; CHECK: s_barrier
> > +; Function Attrs: nounwind
> > +define void @test(<2 x i8> addrspace(3)* nocapture %arg, <2 x i8> addrspace(1)* nocapture readonly %arg1, i32 addrspace(1)* nocapture readonly %arg2, <2 x i8> addrspace(1)* nocapture %arg3, i32 %arg4, i64 %tmp9) #0 {
> > +bb:
> > + %tmp10 = getelementptr inbounds i32 addrspace(1)* %arg2, i64 %tmp9
> > + %tmp13 = load i32 addrspace(1)* %tmp10, align 2
> > + %tmp14 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp13
> > + %tmp15 = load <2 x i8> addrspace(3)* %tmp14, align 2
> > + %tmp16 = add i32 %tmp13, 1
> > + %tmp17 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp16
> > + store <2 x i8> %tmp15, <2 x i8> addrspace(3)* %tmp17, align 2
> > + tail call void @llvm.AMDGPU.barrier.local() #2
> > + %tmp25 = load i32 addrspace(1)* %tmp10, align 4
> > + %tmp26 = sext i32 %tmp25 to i64
> > + %tmp27 = sext i32 %arg4 to i64
> > + %tmp28 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 %arg4
> > + %tmp29 = load i8 addrspace(3)* %tmp28, align 1
> > + %tmp30 = getelementptr inbounds <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 %tmp27
> > + store i8 %tmp29, i8 addrspace(1)* %tmp30, align 1
> > + %tmp32 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 0
> > + %tmp33 = load i8 addrspace(3)* %tmp32, align 1
> > + %tmp35 = getelementptr inbounds <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 0
> > + store i8 %tmp33, i8 addrspace(1)* %tmp35, align 1
> > + ret void
> > +}
> > +
> > +; Function Attrs: noduplicate nounwind
> > +declare void @llvm.AMDGPU.barrier.local() #2
> > +
> > +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
> > +attributes #1 = { nounwind readnone }
> > +attributes #2 = { noduplicate nounwind }
> > +
> > +!opencl.kernels = !{!0}
> > +
> > +!0 = metadata !{void (<2 x i8> addrspace(3)*, <2 x i8> addrspace(1)*, i32 addrspace(1)*, <2 x i8> addrspace(1)*, i32, i64)* @test}
> > +!3 = metadata !{metadata !4, metadata !4, i64 0}
> > +!4 = metadata !{metadata !"int", metadata !5, i64 0}
> > +!5 = metadata !{metadata !"omnipotent char", metadata !6, i64 0}
> > +!6 = metadata !{metadata !"Simple C/C++ TBAA"}
> > +!7 = metadata !{metadata !5, metadata !5, i64 0}
> > --
> > 2.0.4
> >
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list