[llvm] r217371 - [AArch64] Improve AA to remove unneeded edges in the AA MI scheduling graph.
Chad Rosier
mcrosier at codeaurora.org
Wed Sep 10 10:20:13 PDT 2014
Sanjin,
Matt has recanted his comment, so feel free to ignore the previous email.
Chad
>
> On Sep 9, 2014, at 6:03 PM, Matt Arsenault <arsenm2 at gmail.com> wrote:
>
>>
>> On Sep 8, 2014, at 10:43 AM, Chad Rosier <mcrosier at codeaurora.org>
>> wrote:
>>
>>> Author: mcrosier
>>> Date: Mon Sep 8 09:43:48 2014
>>> New Revision: 217371
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=217371&view=rev
>>> Log:
>>> [AArch64] Improve AA to remove unneeded edges in the AA MI scheduling
>>> graph.
>>>
>>> Patch by Sanjin Sijaric <ssijaric at codeaurora.org>!
>>> Phabricator Review: http://reviews.llvm.org/D5103
>>>
>>> Added:
>>> llvm/trunk/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
>>> Modified:
>>> llvm/trunk/include/llvm/Target/TargetInstrInfo.h
>>> llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
>>> llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
>>> llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
>>>
>>> Modified: llvm/trunk/include/llvm/Target/TargetInstrInfo.h
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetInstrInfo.h?rev=217371&r1=217370&r2=217371&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/include/llvm/Target/TargetInstrInfo.h (original)
>>> +++ llvm/trunk/include/llvm/Target/TargetInstrInfo.h Mon Sep 8
>>> 09:43:48 2014
>>> @@ -1192,6 +1192,20 @@ public:
>>> return nullptr;
>>> }
>>>
>>> + // areMemAccessesTriviallyDisjoint - Sometimes, it is possible for
>>> the target
>>> + // to tell, even without aliasing information, that two MIs access
>>> different
>>> + // memory addresses. This function returns true if two MIs access
>>> different
>>> + // memory addresses, and false otherwise.
>>> + virtual bool
>>> + areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr
>>> *MIb,
>>> + AliasAnalysis *AA = nullptr) const {
>>> + assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
>>> + "MIa must load from or modify a memory location");
>>> + assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
>>> + "MIb must load from or modify a memory location");
>>> + return false;
>>> + }
>>> +
>>> private:
>>> int CallFrameSetupOpcode, CallFrameDestroyOpcode;
>>> };
>>>
>>> Modified: llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp?rev=217371&r1=217370&r2=217371&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp (original)
>>> +++ llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp Mon Sep 8 09:43:48
>>> 2014
>>> @@ -511,9 +511,18 @@ static inline bool isUnsafeMemoryObject(
>>> static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo
>>> *MFI,
>>> MachineInstr *MIa,
>>> MachineInstr *MIb) {
>>> + const MachineFunction *MF = MIa->getParent()->getParent();
>>> + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
>>> +
>>> // Cover a trivial case - no edge is need to itself.
>>> if (MIa == MIb)
>>> return false;
>>> +
>>> + // Let the target decide if memory accesses cannot possibly overlap.
>>> + if ((MIa->mayLoad() || MIa->mayStore()) &&
>>> + (MIb->mayLoad() || MIb->mayStore()))
>>> + if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA))
>>> + return false;
>>>
>>> // FIXME: Need to handle multiple memory operands to support all
>>> targets.
>>> if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
>>>
>>> Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=217371&r1=217370&r2=217371&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
>>> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Mon Sep 8
>>> 09:43:48 2014
>>> @@ -607,6 +607,42 @@ bool AArch64InstrInfo::isCoalescableExtI
>>> }
>>> }
>>>
>>> +bool
>>> +AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
>>> + MachineInstr *MIb,
>>> + AliasAnalysis *AA)
>>> const {
>>> + const TargetRegisterInfo *TRI = &getRegisterInfo();
>>> + unsigned BaseRegA = 0, BaseRegB = 0;
>>> + int OffsetA = 0, OffsetB = 0;
>>> + int WidthA = 0, WidthB = 0;
>>> +
>>> + assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
>>> + "MIa must be a store or a load");
>>> + assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
>>> + "MIb must be a store or a load");
>>> +
>>> + if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects()
>>> ||
>>> + MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
>>> + return false;
>>> +
>>> + // Retrieve the base register, offset from the base register and
>>> width. Width
>>> + // is the size of memory that is being loaded/stored (e.g. 1, 2, 4,
>>> 8). If
>>> + // base registers are identical, and the offset of a lower memory
>>> access +
>>> + // the width doesn't overlap the offset of a higher memory access,
>>> + // then the memory accesses are different.
>>> + if (getLdStBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI)
>>> &&
>>> + getLdStBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI))
>>> {
>>> + if (BaseRegA == BaseRegB) {
>>> + int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
>>> + int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
>>> + int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
>>> + if (LowOffset + LowWidth <= HighOffset)
>>> + return true;
>>> + }
>>> + }
>>> + return false;
>>> +}
>>> +
>>> /// analyzeCompare - For a comparison instruction, return the source
>>> registers
>>> /// in SrcReg and SrcReg2, and the value it compares against in
>>> CmpValue.
>>> /// Return true if the comparison instruction can be analyzed.
>>> @@ -1270,6 +1306,102 @@ AArch64InstrInfo::getLdStBaseRegImmOfs(M
>>> };
>>> }
>>>
>>
>>
>> Why not add the width argument to the standard getLdStBaseRegImmOfs?
>> Other targets that will want to implement
>> areMemAccessesTriviallyDisjoint are going to want the same information.
>> The logic of figuring out overlapping offsets is also likely going to be
>> the same, so maybe that part should be pulled into the user of this.
>
> Actually I dont think its necessary for the width argument. Ill be
> posting a patch later today that implements this for R600, and I just used
> the MachineMemOperand to get the size since it was easier than parsing it
> out from the instruction
>
>
>>
>>
>>> +bool AArch64InstrInfo::getLdStBaseRegImmOfsWidth(
>>> + MachineInstr *LdSt, unsigned &BaseReg, int &Offset, int &Width,
>>> + const TargetRegisterInfo *TRI) const {
>>> + // Handle only loads/stores with base register followed by immediate
>>> offset.
>>> + if (LdSt->getNumOperands() != 3)
>>> + return false;
>>> + if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
>>> + return false;
>>> +
>>> + // Offset is calculated as the immediate operand multiplied by the
>>> scaling factor.
>>> + // Unscaled instructions have scaling factor set to 1.
>>> + int Scale = 0;
>>> + switch (LdSt->getOpcode()) {
>>> + default:
>>> + return false;
>>> + case AArch64::LDURQi:
>>> + case AArch64::STURQi:
>>> + Width = 16;
>>> + Scale = 1;
>>> + break;
>>> + case AArch64::LDURXi:
>>> + case AArch64::LDURDi:
>>> + case AArch64::STURXi:
>>> + case AArch64::STURDi:
>>> + Width = 8;
>>> + Scale = 1;
>>> + break;
>>> + case AArch64::LDURWi:
>>> + case AArch64::LDURSi:
>>> + case AArch64::LDURSWi:
>>> + case AArch64::STURWi:
>>> + case AArch64::STURSi:
>>> + Width = 4;
>>> + Scale = 1;
>>> + break;
>>> + case AArch64::LDURHi:
>>> + case AArch64::LDURHHi:
>>> + case AArch64::LDURSHXi:
>>> + case AArch64::LDURSHWi:
>>> + case AArch64::STURHi:
>>> + case AArch64::STURHHi:
>>> + Width = 2;
>>> + Scale = 1;
>>> + break;
>>> + case AArch64::LDURBi:
>>> + case AArch64::LDURBBi:
>>> + case AArch64::LDURSBXi:
>>> + case AArch64::LDURSBWi:
>>> + case AArch64::STURBi:
>>> + case AArch64::STURBBi:
>>> + Width = 1;
>>> + Scale = 1;
>>> + break;
>>> + case AArch64::LDRXui:
>>> + case AArch64::STRXui:
>>> + Scale = Width = 8;
>>> + break;
>>> + case AArch64::LDRWui:
>>> + case AArch64::STRWui:
>>> + Scale = Width = 4;
>>> + break;
>>> + case AArch64::LDRBui:
>>> + case AArch64::STRBui:
>>> + Scale = Width = 1;
>>> + break;
>>> + case AArch64::LDRHui:
>>> + case AArch64::STRHui:
>>> + Scale = Width = 2;
>>> + break;
>>> + case AArch64::LDRSui:
>>> + case AArch64::STRSui:
>>> + Scale = Width = 4;
>>> + break;
>>> + case AArch64::LDRDui:
>>> + case AArch64::STRDui:
>>> + Scale = Width = 8;
>>> + break;
>>> + case AArch64::LDRQui:
>>> + case AArch64::STRQui:
>>> + Scale = Width = 16;
>>> + break;
>>> + case AArch64::LDRBBui:
>>> + case AArch64::STRBBui:
>>> + Scale = Width = 1;
>>> + break;
>>> + case AArch64::LDRHHui:
>>> + case AArch64::STRHHui:
>>> + Scale = Width = 2;
>>> + break;
>>> + };
>>> +
>>> + BaseReg = LdSt->getOperand(1).getReg();
>>> + Offset = LdSt->getOperand(2).getImm() * Scale;
>>> + return true;
>>> +}
>>> +
>>> /// Detect opportunities for ldp/stp formation.
>>> ///
>>> /// Only called for LdSt for which getLdStBaseRegImmOfs returns true.
>>>
>>> Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h?rev=217371&r1=217370&r2=217371&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h (original)
>>> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h Mon Sep 8
>>> 09:43:48 2014
>>> @@ -52,6 +52,10 @@ public:
>>> bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
>>> unsigned &DstReg, unsigned &SubIdx) const
>>> override;
>>>
>>> + bool
>>> + areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr
>>> *MIb,
>>> + AliasAnalysis *AA = nullptr) const
>>> override;
>>> +
>>> unsigned isLoadFromStackSlot(const MachineInstr *MI,
>>> int &FrameIndex) const override;
>>> unsigned isStoreToStackSlot(const MachineInstr *MI,
>>> @@ -90,6 +94,10 @@ public:
>>> unsigned &Offset,
>>> const TargetRegisterInfo *TRI) const
>>> override;
>>>
>>> + bool getLdStBaseRegImmOfsWidth(MachineInstr *LdSt, unsigned
>>> &BaseReg,
>>> + int &Offset, int &Width,
>>> + const TargetRegisterInfo *TRI) const;
>>> +
>>> bool enableClusterLoads() const override { return true; }
>>>
>>> bool shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr
>>> *SecondLdSt,
>>>
>>> Added:
>>> llvm/trunk/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll?rev=217371&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
>>> (added)
>>> +++ llvm/trunk/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
>>> Mon Sep 8 09:43:48 2014
>>> @@ -0,0 +1,31 @@
>>> +; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53
>>> -enable-aa-sched-mi | FileCheck %s
>>> +; Check that the scheduler moves the load from a[1] past the store
>>> into a[2].
>>> + at a = common global i32* null, align 8
>>> + at m = common global i32 0, align 4
>>> +
>>> +; Function Attrs: nounwind
>>> +define i32 @func(i32 %i, i32 %j, i32 %k) #0 {
>>> +entry:
>>> +; CHECK: ldr {{w[0-9]+}}, [x[[REG:[0-9]+]], #4]
>>> +; CHECK: str {{w[0-9]+}}, [x[[REG]], #8]
>>> + %0 = load i32** @a, align 8, !tbaa !1
>>> + %arrayidx = getelementptr inbounds i32* %0, i64 2
>>> + store i32 %i, i32* %arrayidx, align 4, !tbaa !5
>>> + %arrayidx1 = getelementptr inbounds i32* %0, i64 1
>>> + %1 = load i32* %arrayidx1, align 4, !tbaa !5
>>> + %add = add nsw i32 %k, %i
>>> + store i32 %add, i32* @m, align 4, !tbaa !5
>>> + ret i32 %1
>>> +}
>>> +
>>> +attributes #0 = { nounwind "less-precise-fpmad"="false"
>>> "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"
>>> "no-infs-fp-math"="true" "no-nans-fp-math"="true"
>>> "stack-protector-buffer-size"="8" "unsafe-fp-math"="true"
>>> "use-soft-float"="false" }
>>> +
>>> +!llvm.ident = !{!0}
>>> +
>>> +!0 = metadata !{metadata !"clang version 3.6.0 "}
>>> +!1 = metadata !{metadata !2, metadata !2, i64 0}
>>> +!2 = metadata !{metadata !"any pointer", metadata !3, i64 0}
>>> +!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
>>> +!4 = metadata !{metadata !"Simple C/C++ TBAA"}
>>> +!5 = metadata !{metadata !6, metadata !6, i64 0}
>>> +!6 = metadata !{metadata !"int", metadata !3, i64 0}
>>>
>>>
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at cs.uiuc.edu
>>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
>
More information about the llvm-commits
mailing list