[llvm] r217371 - [AArch64] Improve AA to remove unneeded edges in the AA MI scheduling graph.

Mon Sep 29 06:47:12 PDT 2014

+Sanjin

Adding Sanjin since this is his work.

 Chad

> ----- Original Message -----
>> From: "Chad Rosier" <mcrosier at codeaurora.org>
>> To: llvm-commits at cs.uiuc.edu
>> Sent: Monday, September 8, 2014 9:43:48 AM
>> Subject: [llvm] r217371 - [AArch64] Improve AA to remove unneeded edges
>> in the	AA MI scheduling graph.
>>
>> Author: mcrosier
>> Date: Mon Sep  8 09:43:48 2014
>> New Revision: 217371
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=217371&view=rev
>> Log:
>> [AArch64] Improve AA to remove unneeded edges in the AA MI scheduling
>> graph.
>>
>> Patch by Sanjin Sijaric <ssijaric at codeaurora.org>!
>> Phabricator Review: http://reviews.llvm.org/D5103
>>
>> Added:
>>     llvm/trunk/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
>> Modified:
>>     llvm/trunk/include/llvm/Target/TargetInstrInfo.h
>>     llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
>>     llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
>>     llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
>>
>> Modified: llvm/trunk/include/llvm/Target/TargetInstrInfo.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetInstrInfo.h?rev=217371&r1=217370&r2=217371&view=diff
>> ==============================================================================
>> --- llvm/trunk/include/llvm/Target/TargetInstrInfo.h (original)
>> +++ llvm/trunk/include/llvm/Target/TargetInstrInfo.h Mon Sep  8
>> 09:43:48 2014
>> @@ -1192,6 +1192,20 @@ public:
>>      return nullptr;
>>    }
>>
>> +  // areMemAccessesTriviallyDisjoint - Sometimes, it is possible for
>> the target
>> +  // to tell, even without aliasing information, that two MIs access
>> different
>> +  // memory addresses. This function returns true if two MIs access
>> different
>> +  // memory addresses, and false otherwise.
>> +  virtual bool
>> +  areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr
>> *MIb,
>> +                                  AliasAnalysis *AA = nullptr) const
>> {
>> +    assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
>> +           "MIa must load from or modify a memory location");
>> +    assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
>> +           "MIb must load from or modify a memory location");
>> +    return false;
>> +  }
>> +
>>  private:
>>    int CallFrameSetupOpcode, CallFrameDestroyOpcode;
>>  };
>>
>> Modified: llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp?rev=217371&r1=217370&r2=217371&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp Mon Sep  8 09:43:48
>> 2014
>> @@ -511,9 +511,18 @@ static inline bool isUnsafeMemoryObject(
>>  static bool MIsNeedChainEdge(AliasAnalysis *AA, const
>>  MachineFrameInfo *MFI,
>>                               MachineInstr *MIa,
>>                               MachineInstr *MIb) {
>> +  const MachineFunction *MF = MIa->getParent()->getParent();
>> +  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
>> +
>>    // Cover a trivial case - no edge is need to itself.
>>    if (MIa == MIb)
>>      return false;
>> +
>> +  // Let the target decide if memory accesses cannot possibly
>> overlap.
>> +  if ((MIa->mayLoad() || MIa->mayStore()) &&
>> +      (MIb->mayLoad() || MIb->mayStore()))
>> +    if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA))
>> +      return false;
>>
>>    // FIXME: Need to handle multiple memory operands to support all
>>    targets.
>>    if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
>>
>> Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=217371&r1=217370&r2=217371&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
>> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Mon Sep  8
>> 09:43:48 2014
>> @@ -607,6 +607,42 @@ bool AArch64InstrInfo::isCoalescableExtI
>>    }
>>  }
>>
>> +bool
>> +AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
>> +                                                  MachineInstr *MIb,
>> +                                                  AliasAnalysis *AA)
>> const {
>> +  const TargetRegisterInfo *TRI = &getRegisterInfo();
>> +  unsigned BaseRegA = 0, BaseRegB = 0;
>> +  int OffsetA = 0, OffsetB = 0;
>> +  int WidthA = 0, WidthB = 0;
>> +
>> +  assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
>> +         "MIa must be a store or a load");
>> +  assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
>> +         "MIb must be a store or a load");
>> +
>> +  if (MIa->hasUnmodeledSideEffects() ||
>> MIb->hasUnmodeledSideEffects() ||
>> +      MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
>> +    return false;
>> +
>> +  // Retrieve the base register, offset from the base register and
>> width. Width
>> +  // is the size of memory that is being loaded/stored (e.g. 1, 2,
>> 4, 8).  If
>> +  // base registers are identical, and the offset of a lower memory
>> access +
>> +  // the width doesn't overlap the offset of a higher memory access,
>> +  // then the memory accesses are different.
>> +  if (getLdStBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI)
>> &&
>> +      getLdStBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB,
>> TRI)) {
>> +    if (BaseRegA == BaseRegB) {
>
> I think that this makes sense only for SSA virtual registers, but not for
> physical ones (because the value in a physical register might have changed
> in between the two instructions). If I'm right, then you'll want to
> exclude physical registers here (at least any that might be modified
> during the function's execution).
>
>  -Hal
>
>> +      int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
>> +      int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
>> +      int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
>> +      if (LowOffset + LowWidth <= HighOffset)
>> +        return true;
>> +    }
>> +  }
>> +  return false;
>> +}
>> +
>>  /// analyzeCompare - For a comparison instruction, return the source
>>  registers
>>  /// in SrcReg and SrcReg2, and the value it compares against in
>>  CmpValue.
>>  /// Return true if the comparison instruction can be analyzed.
>> @@ -1270,6 +1306,102 @@ AArch64InstrInfo::getLdStBaseRegImmOfs(M
>>    };
>>  }
>>
>> +bool AArch64InstrInfo::getLdStBaseRegImmOfsWidth(
>> +    MachineInstr *LdSt, unsigned &BaseReg, int &Offset, int &Width,
>> +    const TargetRegisterInfo *TRI) const {
>> +  // Handle only loads/stores with base register followed by
>> immediate offset.
>> +  if (LdSt->getNumOperands() != 3)
>> +    return false;
>> +  if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
>> +    return false;
>> +
>> +  // Offset is calculated as the immediate operand multiplied by the
>> scaling factor.
>> +  // Unscaled instructions have scaling factor set to 1.
>> +  int Scale = 0;
>> +  switch (LdSt->getOpcode()) {
>> +  default:
>> +    return false;
>> +  case AArch64::LDURQi:
>> +  case AArch64::STURQi:
>> +    Width = 16;
>> +    Scale = 1;
>> +    break;
>> +  case AArch64::LDURXi:
>> +  case AArch64::LDURDi:
>> +  case AArch64::STURXi:
>> +  case AArch64::STURDi:
>> +    Width = 8;
>> +    Scale = 1;
>> +    break;
>> +  case AArch64::LDURWi:
>> +  case AArch64::LDURSi:
>> +  case AArch64::LDURSWi:
>> +  case AArch64::STURWi:
>> +  case AArch64::STURSi:
>> +    Width = 4;
>> +    Scale = 1;
>> +    break;
>> +  case AArch64::LDURHi:
>> +  case AArch64::LDURHHi:
>> +  case AArch64::LDURSHXi:
>> +  case AArch64::LDURSHWi:
>> +  case AArch64::STURHi:
>> +  case AArch64::STURHHi:
>> +    Width = 2;
>> +    Scale = 1;
>> +    break;
>> +  case AArch64::LDURBi:
>> +  case AArch64::LDURBBi:
>> +  case AArch64::LDURSBXi:
>> +  case AArch64::LDURSBWi:
>> +  case AArch64::STURBi:
>> +  case AArch64::STURBBi:
>> +    Width = 1;
>> +    Scale = 1;
>> +    break;
>> +  case AArch64::LDRXui:
>> +  case AArch64::STRXui:
>> +    Scale = Width = 8;
>> +    break;
>> +  case AArch64::LDRWui:
>> +  case AArch64::STRWui:
>> +    Scale = Width = 4;
>> +    break;
>> +  case AArch64::LDRBui:
>> +  case AArch64::STRBui:
>> +    Scale = Width = 1;
>> +    break;
>> +  case AArch64::LDRHui:
>> +  case AArch64::STRHui:
>> +    Scale = Width = 2;
>> +    break;
>> +  case AArch64::LDRSui:
>> +  case AArch64::STRSui:
>> +    Scale = Width = 4;
>> +    break;
>> +  case AArch64::LDRDui:
>> +  case AArch64::STRDui:
>> +    Scale = Width = 8;
>> +    break;
>> +  case AArch64::LDRQui:
>> +  case AArch64::STRQui:
>> +    Scale = Width = 16;
>> +    break;
>> +  case AArch64::LDRBBui:
>> +  case AArch64::STRBBui:
>> +    Scale = Width = 1;
>> +    break;
>> +  case AArch64::LDRHHui:
>> +  case AArch64::STRHHui:
>> +    Scale = Width = 2;
>> +    break;
>> +  };
>> +
>> +  BaseReg = LdSt->getOperand(1).getReg();
>> +  Offset = LdSt->getOperand(2).getImm() * Scale;
>> +  return true;
>> +}
>> +
>>  /// Detect opportunities for ldp/stp formation.
>>  ///
>>  /// Only called for LdSt for which getLdStBaseRegImmOfs returns
>>  true.
>>
>> Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h?rev=217371&r1=217370&r2=217371&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h (original)
>> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h Mon Sep  8
>> 09:43:48 2014
>> @@ -52,6 +52,10 @@ public:
>>    bool isCoalescableExtInstr(const MachineInstr &MI, unsigned
>>    &SrcReg,
>>                               unsigned &DstReg, unsigned &SubIdx)
>>                               const override;
>>
>> +  bool
>> +  areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr
>> *MIb,
>> +                                  AliasAnalysis *AA = nullptr) const
>> override;
>> +
>>    unsigned isLoadFromStackSlot(const MachineInstr *MI,
>>                                 int &FrameIndex) const override;
>>    unsigned isStoreToStackSlot(const MachineInstr *MI,
>> @@ -90,6 +94,10 @@ public:
>>                              unsigned &Offset,
>>                              const TargetRegisterInfo *TRI) const
>>                              override;
>>
>> +  bool getLdStBaseRegImmOfsWidth(MachineInstr *LdSt, unsigned
>> &BaseReg,
>> +                                 int &Offset, int &Width,
>> +                                 const TargetRegisterInfo *TRI)
>> const;
>> +
>>    bool enableClusterLoads() const override { return true; }
>>
>>    bool shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr
>>    *SecondLdSt,
>>
>> Added:
>> llvm/trunk/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll?rev=217371&view=auto
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
>> (added)
>> +++ llvm/trunk/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
>> Mon Sep  8 09:43:48 2014
>> @@ -0,0 +1,31 @@
>> +; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53
>> -enable-aa-sched-mi | FileCheck %s
>> +; Check that the scheduler moves the load from a[1] past the store
>> into a[2].
>> + at a = common global i32* null, align 8
>> + at m = common global i32 0, align 4
>> +
>> +; Function Attrs: nounwind
>> +define i32 @func(i32 %i, i32 %j, i32 %k) #0 {
>> +entry:
>> +; CHECK: ldr {{w[0-9]+}}, [x[[REG:[0-9]+]], #4]
>> +; CHECK: str {{w[0-9]+}}, [x[[REG]], #8]
>> +  %0 = load i32** @a, align 8, !tbaa !1
>> +  %arrayidx = getelementptr inbounds i32* %0, i64 2
>> +  store i32 %i, i32* %arrayidx, align 4, !tbaa !5
>> +  %arrayidx1 = getelementptr inbounds i32* %0, i64 1
>> +  %1 = load i32* %arrayidx1, align 4, !tbaa !5
>> +  %add = add nsw i32 %k, %i
>> +  store i32 %add, i32* @m, align 4, !tbaa !5
>> +  ret i32 %1
>> +}
>> +
>> +attributes #0 = { nounwind "less-precise-fpmad"="false"
>> "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"
>> "no-infs-fp-math"="true" "no-nans-fp-math"="true"
>> "stack-protector-buffer-size"="8" "unsafe-fp-math"="true"
>> "use-soft-float"="false" }
>> +
>> +!llvm.ident = !{!0}
>> +
>> +!0 = metadata !{metadata !"clang version 3.6.0 "}
>> +!1 = metadata !{metadata !2, metadata !2, i64 0}
>> +!2 = metadata !{metadata !"any pointer", metadata !3, i64 0}
>> +!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
>> +!4 = metadata !{metadata !"Simple C/C++ TBAA"}
>> +!5 = metadata !{metadata !6, metadata !6, i64 0}
>> +!6 = metadata !{metadata !"int", metadata !3, i64 0}
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>
>
> --
> Hal Finkel
> Assistant Computational Scientist
> Leadership Computing Facility
> Argonne National Laboratory
>