[llvm] r217371 - [AArch64] Improve AA to remove unneeded edges in the AA MI scheduling graph.
Chad Rosier
mcrosier at codeaurora.org
Mon Sep 29 06:47:12 PDT 2014
+Sanjin
Adding Sanjin since this is his work.
Chad
> ----- Original Message -----
>> From: "Chad Rosier" <mcrosier at codeaurora.org>
>> To: llvm-commits at cs.uiuc.edu
>> Sent: Monday, September 8, 2014 9:43:48 AM
>> Subject: [llvm] r217371 - [AArch64] Improve AA to remove unneeded edges
>> in the AA MI scheduling graph.
>>
>> Author: mcrosier
>> Date: Mon Sep 8 09:43:48 2014
>> New Revision: 217371
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=217371&view=rev
>> Log:
>> [AArch64] Improve AA to remove unneeded edges in the AA MI scheduling
>> graph.
>>
>> Patch by Sanjin Sijaric <ssijaric at codeaurora.org>!
>> Phabricator Review: http://reviews.llvm.org/D5103
>>
>> Added:
>> llvm/trunk/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
>> Modified:
>> llvm/trunk/include/llvm/Target/TargetInstrInfo.h
>> llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
>> llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
>> llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
>>
>> Modified: llvm/trunk/include/llvm/Target/TargetInstrInfo.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetInstrInfo.h?rev=217371&r1=217370&r2=217371&view=diff
>> ==============================================================================
>> --- llvm/trunk/include/llvm/Target/TargetInstrInfo.h (original)
>> +++ llvm/trunk/include/llvm/Target/TargetInstrInfo.h Mon Sep 8
>> 09:43:48 2014
>> @@ -1192,6 +1192,20 @@ public:
>> return nullptr;
>> }
>>
>> + // areMemAccessesTriviallyDisjoint - Sometimes, it is possible for
>> the target
>> + // to tell, even without aliasing information, that two MIs access
>> different
>> + // memory addresses. This function returns true if two MIs access
>> different
>> + // memory addresses, and false otherwise.
>> + virtual bool
>> + areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr
>> *MIb,
>> + AliasAnalysis *AA = nullptr) const
>> {
>> + assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
>> + "MIa must load from or modify a memory location");
>> + assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
>> + "MIb must load from or modify a memory location");
>> + return false;
>> + }
>> +
>> private:
>> int CallFrameSetupOpcode, CallFrameDestroyOpcode;
>> };
>>
>> Modified: llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp?rev=217371&r1=217370&r2=217371&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp Mon Sep 8 09:43:48
>> 2014
>> @@ -511,9 +511,18 @@ static inline bool isUnsafeMemoryObject(
>> static bool MIsNeedChainEdge(AliasAnalysis *AA, const
>> MachineFrameInfo *MFI,
>> MachineInstr *MIa,
>> MachineInstr *MIb) {
>> + const MachineFunction *MF = MIa->getParent()->getParent();
>> + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
>> +
>> // Cover a trivial case - no edge is need to itself.
>> if (MIa == MIb)
>> return false;
>> +
>> + // Let the target decide if memory accesses cannot possibly
>> overlap.
>> + if ((MIa->mayLoad() || MIa->mayStore()) &&
>> + (MIb->mayLoad() || MIb->mayStore()))
>> + if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA))
>> + return false;
>>
>> // FIXME: Need to handle multiple memory operands to support all
>> targets.
>> if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
>>
>> Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=217371&r1=217370&r2=217371&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
>> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Mon Sep 8
>> 09:43:48 2014
>> @@ -607,6 +607,42 @@ bool AArch64InstrInfo::isCoalescableExtI
>> }
>> }
>>
>> +bool
>> +AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
>> + MachineInstr *MIb,
>> + AliasAnalysis *AA)
>> const {
>> + const TargetRegisterInfo *TRI = &getRegisterInfo();
>> + unsigned BaseRegA = 0, BaseRegB = 0;
>> + int OffsetA = 0, OffsetB = 0;
>> + int WidthA = 0, WidthB = 0;
>> +
>> + assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
>> + "MIa must be a store or a load");
>> + assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
>> + "MIb must be a store or a load");
>> +
>> + if (MIa->hasUnmodeledSideEffects() ||
>> MIb->hasUnmodeledSideEffects() ||
>> + MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
>> + return false;
>> +
>> + // Retrieve the base register, offset from the base register and
>> width. Width
>> + // is the size of memory that is being loaded/stored (e.g. 1, 2,
>> 4, 8). If
>> + // base registers are identical, and the offset of a lower memory
>> access +
>> + // the width doesn't overlap the offset of a higher memory access,
>> + // then the memory accesses are different.
>> + if (getLdStBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI)
>> &&
>> + getLdStBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB,
>> TRI)) {
>> + if (BaseRegA == BaseRegB) {
>
> I think that this makes sense only for SSA virtual registers, but not for
> physical ones (because the value in a physical register might have changed
> in between the two instructions). If I'm right, then you'll want to
> exclude physical registers here (at least any that might be modified
> during the function's execution).
>
> -Hal
>
>> + int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
>> + int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
>> + int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
>> + if (LowOffset + LowWidth <= HighOffset)
>> + return true;
>> + }
>> + }
>> + return false;
>> +}
>> +
>> /// analyzeCompare - For a comparison instruction, return the source
>> registers
>> /// in SrcReg and SrcReg2, and the value it compares against in
>> CmpValue.
>> /// Return true if the comparison instruction can be analyzed.
>> @@ -1270,6 +1306,102 @@ AArch64InstrInfo::getLdStBaseRegImmOfs(M
>> };
>> }
>>
>> +bool AArch64InstrInfo::getLdStBaseRegImmOfsWidth(
>> + MachineInstr *LdSt, unsigned &BaseReg, int &Offset, int &Width,
>> + const TargetRegisterInfo *TRI) const {
>> + // Handle only loads/stores with base register followed by
>> immediate offset.
>> + if (LdSt->getNumOperands() != 3)
>> + return false;
>> + if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
>> + return false;
>> +
>> + // Offset is calculated as the immediate operand multiplied by the
>> scaling factor.
>> + // Unscaled instructions have scaling factor set to 1.
>> + int Scale = 0;
>> + switch (LdSt->getOpcode()) {
>> + default:
>> + return false;
>> + case AArch64::LDURQi:
>> + case AArch64::STURQi:
>> + Width = 16;
>> + Scale = 1;
>> + break;
>> + case AArch64::LDURXi:
>> + case AArch64::LDURDi:
>> + case AArch64::STURXi:
>> + case AArch64::STURDi:
>> + Width = 8;
>> + Scale = 1;
>> + break;
>> + case AArch64::LDURWi:
>> + case AArch64::LDURSi:
>> + case AArch64::LDURSWi:
>> + case AArch64::STURWi:
>> + case AArch64::STURSi:
>> + Width = 4;
>> + Scale = 1;
>> + break;
>> + case AArch64::LDURHi:
>> + case AArch64::LDURHHi:
>> + case AArch64::LDURSHXi:
>> + case AArch64::LDURSHWi:
>> + case AArch64::STURHi:
>> + case AArch64::STURHHi:
>> + Width = 2;
>> + Scale = 1;
>> + break;
>> + case AArch64::LDURBi:
>> + case AArch64::LDURBBi:
>> + case AArch64::LDURSBXi:
>> + case AArch64::LDURSBWi:
>> + case AArch64::STURBi:
>> + case AArch64::STURBBi:
>> + Width = 1;
>> + Scale = 1;
>> + break;
>> + case AArch64::LDRXui:
>> + case AArch64::STRXui:
>> + Scale = Width = 8;
>> + break;
>> + case AArch64::LDRWui:
>> + case AArch64::STRWui:
>> + Scale = Width = 4;
>> + break;
>> + case AArch64::LDRBui:
>> + case AArch64::STRBui:
>> + Scale = Width = 1;
>> + break;
>> + case AArch64::LDRHui:
>> + case AArch64::STRHui:
>> + Scale = Width = 2;
>> + break;
>> + case AArch64::LDRSui:
>> + case AArch64::STRSui:
>> + Scale = Width = 4;
>> + break;
>> + case AArch64::LDRDui:
>> + case AArch64::STRDui:
>> + Scale = Width = 8;
>> + break;
>> + case AArch64::LDRQui:
>> + case AArch64::STRQui:
>> + Scale = Width = 16;
>> + break;
>> + case AArch64::LDRBBui:
>> + case AArch64::STRBBui:
>> + Scale = Width = 1;
>> + break;
>> + case AArch64::LDRHHui:
>> + case AArch64::STRHHui:
>> + Scale = Width = 2;
>> + break;
>> + };
>> +
>> + BaseReg = LdSt->getOperand(1).getReg();
>> + Offset = LdSt->getOperand(2).getImm() * Scale;
>> + return true;
>> +}
>> +
>> /// Detect opportunities for ldp/stp formation.
>> ///
>> /// Only called for LdSt for which getLdStBaseRegImmOfs returns
>> true.
>>
>> Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h?rev=217371&r1=217370&r2=217371&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h (original)
>> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h Mon Sep 8
>> 09:43:48 2014
>> @@ -52,6 +52,10 @@ public:
>> bool isCoalescableExtInstr(const MachineInstr &MI, unsigned
>> &SrcReg,
>> unsigned &DstReg, unsigned &SubIdx)
>> const override;
>>
>> + bool
>> + areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr
>> *MIb,
>> + AliasAnalysis *AA = nullptr) const
>> override;
>> +
>> unsigned isLoadFromStackSlot(const MachineInstr *MI,
>> int &FrameIndex) const override;
>> unsigned isStoreToStackSlot(const MachineInstr *MI,
>> @@ -90,6 +94,10 @@ public:
>> unsigned &Offset,
>> const TargetRegisterInfo *TRI) const
>> override;
>>
>> + bool getLdStBaseRegImmOfsWidth(MachineInstr *LdSt, unsigned
>> &BaseReg,
>> + int &Offset, int &Width,
>> + const TargetRegisterInfo *TRI)
>> const;
>> +
>> bool enableClusterLoads() const override { return true; }
>>
>> bool shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr
>> *SecondLdSt,
>>
>> Added:
>> llvm/trunk/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll?rev=217371&view=auto
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
>> (added)
>> +++ llvm/trunk/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
>> Mon Sep 8 09:43:48 2014
>> @@ -0,0 +1,31 @@
>> +; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53
>> -enable-aa-sched-mi | FileCheck %s
>> +; Check that the scheduler moves the load from a[1] past the store
>> into a[2].
>> + at a = common global i32* null, align 8
>> + at m = common global i32 0, align 4
>> +
>> +; Function Attrs: nounwind
>> +define i32 @func(i32 %i, i32 %j, i32 %k) #0 {
>> +entry:
>> +; CHECK: ldr {{w[0-9]+}}, [x[[REG:[0-9]+]], #4]
>> +; CHECK: str {{w[0-9]+}}, [x[[REG]], #8]
>> + %0 = load i32** @a, align 8, !tbaa !1
>> + %arrayidx = getelementptr inbounds i32* %0, i64 2
>> + store i32 %i, i32* %arrayidx, align 4, !tbaa !5
>> + %arrayidx1 = getelementptr inbounds i32* %0, i64 1
>> + %1 = load i32* %arrayidx1, align 4, !tbaa !5
>> + %add = add nsw i32 %k, %i
>> + store i32 %add, i32* @m, align 4, !tbaa !5
>> + ret i32 %1
>> +}
>> +
>> +attributes #0 = { nounwind "less-precise-fpmad"="false"
>> "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"
>> "no-infs-fp-math"="true" "no-nans-fp-math"="true"
>> "stack-protector-buffer-size"="8" "unsafe-fp-math"="true"
>> "use-soft-float"="false" }
>> +
>> +!llvm.ident = !{!0}
>> +
>> +!0 = metadata !{metadata !"clang version 3.6.0 "}
>> +!1 = metadata !{metadata !2, metadata !2, i64 0}
>> +!2 = metadata !{metadata !"any pointer", metadata !3, i64 0}
>> +!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
>> +!4 = metadata !{metadata !"Simple C/C++ TBAA"}
>> +!5 = metadata !{metadata !6, metadata !6, i64 0}
>> +!6 = metadata !{metadata !"int", metadata !3, i64 0}
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>
>
> --
> Hal Finkel
> Assistant Computational Scientist
> Leadership Computing Facility
> Argonne National Laboratory
>
More information about the llvm-commits
mailing list