[llvm] r217371 - [AArch64] Improve AA to remove unneeded edges in the AA MI scheduling graph.

Chad Rosier mcrosier at codeaurora.org
Wed Sep 10 10:20:13 PDT 2014


Sanjin,
Matt has recanted his comment, so feel free to ignore the previous email.

 Chad

>
> On Sep 9, 2014, at 6:03 PM, Matt Arsenault <arsenm2 at gmail.com> wrote:
>
>>
>> On Sep 8, 2014, at 10:43 AM, Chad Rosier <mcrosier at codeaurora.org>
>> wrote:
>>
>>> Author: mcrosier
>>> Date: Mon Sep  8 09:43:48 2014
>>> New Revision: 217371
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=217371&view=rev
>>> Log:
>>> [AArch64] Improve AA to remove unneeded edges in the AA MI scheduling
>>> graph.
>>>
>>> Patch by Sanjin Sijaric <ssijaric at codeaurora.org>!
>>> Phabricator Review: http://reviews.llvm.org/D5103
>>>
>>> Added:
>>>   llvm/trunk/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
>>> Modified:
>>>   llvm/trunk/include/llvm/Target/TargetInstrInfo.h
>>>   llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
>>>   llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
>>>   llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
>>>
>>> Modified: llvm/trunk/include/llvm/Target/TargetInstrInfo.h
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetInstrInfo.h?rev=217371&r1=217370&r2=217371&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/include/llvm/Target/TargetInstrInfo.h (original)
>>> +++ llvm/trunk/include/llvm/Target/TargetInstrInfo.h Mon Sep  8
>>> 09:43:48 2014
>>> @@ -1192,6 +1192,20 @@ public:
>>>    return nullptr;
>>>  }
>>>
>>> +  // areMemAccessesTriviallyDisjoint - Sometimes, it is possible for
>>> the target
>>> +  // to tell, even without aliasing information, that two MIs access
>>> different
>>> +  // memory addresses. This function returns true if two MIs access
>>> different
>>> +  // memory addresses, and false otherwise.
>>> +  virtual bool
>>> +  areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr
>>> *MIb,
>>> +                                  AliasAnalysis *AA = nullptr) const {
>>> +    assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
>>> +           "MIa must load from or modify a memory location");
>>> +    assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
>>> +           "MIb must load from or modify a memory location");
>>> +    return false;
>>> +  }
>>> +
>>> private:
>>>  int CallFrameSetupOpcode, CallFrameDestroyOpcode;
>>> };
>>>
>>> Modified: llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp?rev=217371&r1=217370&r2=217371&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp (original)
>>> +++ llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp Mon Sep  8 09:43:48
>>> 2014
>>> @@ -511,9 +511,18 @@ static inline bool isUnsafeMemoryObject(
>>> static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo
>>> *MFI,
>>>                             MachineInstr *MIa,
>>>                             MachineInstr *MIb) {
>>> +  const MachineFunction *MF = MIa->getParent()->getParent();
>>> +  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
>>> +
>>>  // Cover a trivial case - no edge is need to itself.
>>>  if (MIa == MIb)
>>>    return false;
>>> +
>>> +  // Let the target decide if memory accesses cannot possibly overlap.
>>> +  if ((MIa->mayLoad() || MIa->mayStore()) &&
>>> +      (MIb->mayLoad() || MIb->mayStore()))
>>> +    if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA))
>>> +      return false;
>>>
>>>  // FIXME: Need to handle multiple memory operands to support all
>>> targets.
>>>  if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
>>>
>>> Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=217371&r1=217370&r2=217371&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
>>> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Mon Sep  8
>>> 09:43:48 2014
>>> @@ -607,6 +607,42 @@ bool AArch64InstrInfo::isCoalescableExtI
>>>  }
>>> }
>>>
>>> +bool
>>> +AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
>>> +                                                  MachineInstr *MIb,
>>> +                                                  AliasAnalysis *AA)
>>> const {
>>> +  const TargetRegisterInfo *TRI = &getRegisterInfo();
>>> +  unsigned BaseRegA = 0, BaseRegB = 0;
>>> +  int OffsetA = 0, OffsetB = 0;
>>> +  int WidthA = 0, WidthB = 0;
>>> +
>>> +  assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
>>> +         "MIa must be a store or a load");
>>> +  assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
>>> +         "MIb must be a store or a load");
>>> +
>>> +  if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects()
>>> ||
>>> +      MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
>>> +    return false;
>>> +
>>> +  // Retrieve the base register, offset from the base register and
>>> width. Width
>>> +  // is the size of memory that is being loaded/stored (e.g. 1, 2, 4,
>>> 8).  If
>>> +  // base registers are identical, and the offset of a lower memory
>>> access +
>>> +  // the width doesn't overlap the offset of a higher memory access,
>>> +  // then the memory accesses are different.
>>> +  if (getLdStBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI)
>>> &&
>>> +      getLdStBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI))
>>> {
>>> +    if (BaseRegA == BaseRegB) {
>>> +      int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
>>> +      int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
>>> +      int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
>>> +      if (LowOffset + LowWidth <= HighOffset)
>>> +        return true;
>>> +    }
>>> +  }
>>> +  return false;
>>> +}
>>> +
>>> /// analyzeCompare - For a comparison instruction, return the source
>>> registers
>>> /// in SrcReg and SrcReg2, and the value it compares against in
>>> CmpValue.
>>> /// Return true if the comparison instruction can be analyzed.
>>> @@ -1270,6 +1306,102 @@ AArch64InstrInfo::getLdStBaseRegImmOfs(M
>>>  };
>>> }
>>>
>>
>>
>> Why not add the width argument to the standard getLdStBaseRegImmOfs?
>> Other targets that will want to implement
>> areMemAccessesTriviallyDisjoint are going to want the same information.
>> The logic of figuring out overlapping offsets is also likely going to be
>> the same, so maybe that part should be pulled into the user of this.
>
> Actually I don’t think it’s necessary for the width argument. I’ll be
> posting a patch later today that implements this for R600, and I just used
> the MachineMemOperand to get the size since it was easier than parsing it
> out from the instruction
>
>
>>
>>
>>> +bool AArch64InstrInfo::getLdStBaseRegImmOfsWidth(
>>> +    MachineInstr *LdSt, unsigned &BaseReg, int &Offset, int &Width,
>>> +    const TargetRegisterInfo *TRI) const {
>>> +  // Handle only loads/stores with base register followed by immediate
>>> offset.
>>> +  if (LdSt->getNumOperands() != 3)
>>> +    return false;
>>> +  if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
>>> +    return false;
>>> +
>>> +  // Offset is calculated as the immediate operand multiplied by the
>>> scaling factor.
>>> +  // Unscaled instructions have scaling factor set to 1.
>>> +  int Scale = 0;
>>> +  switch (LdSt->getOpcode()) {
>>> +  default:
>>> +    return false;
>>> +  case AArch64::LDURQi:
>>> +  case AArch64::STURQi:
>>> +    Width = 16;
>>> +    Scale = 1;
>>> +    break;
>>> +  case AArch64::LDURXi:
>>> +  case AArch64::LDURDi:
>>> +  case AArch64::STURXi:
>>> +  case AArch64::STURDi:
>>> +    Width = 8;
>>> +    Scale = 1;
>>> +    break;
>>> +  case AArch64::LDURWi:
>>> +  case AArch64::LDURSi:
>>> +  case AArch64::LDURSWi:
>>> +  case AArch64::STURWi:
>>> +  case AArch64::STURSi:
>>> +    Width = 4;
>>> +    Scale = 1;
>>> +    break;
>>> +  case AArch64::LDURHi:
>>> +  case AArch64::LDURHHi:
>>> +  case AArch64::LDURSHXi:
>>> +  case AArch64::LDURSHWi:
>>> +  case AArch64::STURHi:
>>> +  case AArch64::STURHHi:
>>> +    Width = 2;
>>> +    Scale = 1;
>>> +    break;
>>> +  case AArch64::LDURBi:
>>> +  case AArch64::LDURBBi:
>>> +  case AArch64::LDURSBXi:
>>> +  case AArch64::LDURSBWi:
>>> +  case AArch64::STURBi:
>>> +  case AArch64::STURBBi:
>>> +    Width = 1;
>>> +    Scale = 1;
>>> +    break;
>>> +  case AArch64::LDRXui:
>>> +  case AArch64::STRXui:
>>> +    Scale = Width = 8;
>>> +    break;
>>> +  case AArch64::LDRWui:
>>> +  case AArch64::STRWui:
>>> +    Scale = Width = 4;
>>> +    break;
>>> +  case AArch64::LDRBui:
>>> +  case AArch64::STRBui:
>>> +    Scale = Width = 1;
>>> +    break;
>>> +  case AArch64::LDRHui:
>>> +  case AArch64::STRHui:
>>> +    Scale = Width = 2;
>>> +    break;
>>> +  case AArch64::LDRSui:
>>> +  case AArch64::STRSui:
>>> +    Scale = Width = 4;
>>> +    break;
>>> +  case AArch64::LDRDui:
>>> +  case AArch64::STRDui:
>>> +    Scale = Width = 8;
>>> +    break;
>>> +  case AArch64::LDRQui:
>>> +  case AArch64::STRQui:
>>> +    Scale = Width = 16;
>>> +    break;
>>> +  case AArch64::LDRBBui:
>>> +  case AArch64::STRBBui:
>>> +    Scale = Width = 1;
>>> +    break;
>>> +  case AArch64::LDRHHui:
>>> +  case AArch64::STRHHui:
>>> +    Scale = Width = 2;
>>> +    break;
>>> +  };
>>> +
>>> +  BaseReg = LdSt->getOperand(1).getReg();
>>> +  Offset = LdSt->getOperand(2).getImm() * Scale;
>>> +  return true;
>>> +}
>>> +
>>> /// Detect opportunities for ldp/stp formation.
>>> ///
>>> /// Only called for LdSt for which getLdStBaseRegImmOfs returns true.
>>>
>>> Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h?rev=217371&r1=217370&r2=217371&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h (original)
>>> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h Mon Sep  8
>>> 09:43:48 2014
>>> @@ -52,6 +52,10 @@ public:
>>>  bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
>>>                             unsigned &DstReg, unsigned &SubIdx) const
>>> override;
>>>
>>> +  bool
>>> +  areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr
>>> *MIb,
>>> +                                  AliasAnalysis *AA = nullptr) const
>>> override;
>>> +
>>>  unsigned isLoadFromStackSlot(const MachineInstr *MI,
>>>                               int &FrameIndex) const override;
>>>  unsigned isStoreToStackSlot(const MachineInstr *MI,
>>> @@ -90,6 +94,10 @@ public:
>>>                            unsigned &Offset,
>>>                            const TargetRegisterInfo *TRI) const
>>> override;
>>>
>>> +  bool getLdStBaseRegImmOfsWidth(MachineInstr *LdSt, unsigned
>>> &BaseReg,
>>> +                                 int &Offset, int &Width,
>>> +                                 const TargetRegisterInfo *TRI) const;
>>> +
>>>  bool enableClusterLoads() const override { return true; }
>>>
>>>  bool shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr
>>> *SecondLdSt,
>>>
>>> Added:
>>> llvm/trunk/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll?rev=217371&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
>>> (added)
>>> +++ llvm/trunk/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
>>> Mon Sep  8 09:43:48 2014
>>> @@ -0,0 +1,31 @@
>>> +; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53
>>> -enable-aa-sched-mi | FileCheck %s
>>> +; Check that the scheduler moves the load from a[1] past the store
>>> into a[2].
>>> + at a = common global i32* null, align 8
>>> + at m = common global i32 0, align 4
>>> +
>>> +; Function Attrs: nounwind
>>> +define i32 @func(i32 %i, i32 %j, i32 %k) #0 {
>>> +entry:
>>> +; CHECK: ldr {{w[0-9]+}}, [x[[REG:[0-9]+]], #4]
>>> +; CHECK: str {{w[0-9]+}}, [x[[REG]], #8]
>>> +  %0 = load i32** @a, align 8, !tbaa !1
>>> +  %arrayidx = getelementptr inbounds i32* %0, i64 2
>>> +  store i32 %i, i32* %arrayidx, align 4, !tbaa !5
>>> +  %arrayidx1 = getelementptr inbounds i32* %0, i64 1
>>> +  %1 = load i32* %arrayidx1, align 4, !tbaa !5
>>> +  %add = add nsw i32 %k, %i
>>> +  store i32 %add, i32* @m, align 4, !tbaa !5
>>> +  ret i32 %1
>>> +}
>>> +
>>> +attributes #0 = { nounwind "less-precise-fpmad"="false"
>>> "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"
>>> "no-infs-fp-math"="true" "no-nans-fp-math"="true"
>>> "stack-protector-buffer-size"="8" "unsafe-fp-math"="true"
>>> "use-soft-float"="false" }
>>> +
>>> +!llvm.ident = !{!0}
>>> +
>>> +!0 = metadata !{metadata !"clang version 3.6.0 "}
>>> +!1 = metadata !{metadata !2, metadata !2, i64 0}
>>> +!2 = metadata !{metadata !"any pointer", metadata !3, i64 0}
>>> +!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
>>> +!4 = metadata !{metadata !"Simple C/C++ TBAA"}
>>> +!5 = metadata !{metadata !6, metadata !6, i64 0}
>>> +!6 = metadata !{metadata !"int", metadata !3, i64 0}
>>>
>>>
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at cs.uiuc.edu
>>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
>





More information about the llvm-commits mailing list