[llvm-branch-commits] [llvm] a9f14cd - [ARM] Add bank conflict hazarding

David Green via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Dec 23 06:06:00 PST 2020


Author: David Penry
Date: 2020-12-23T14:00:59Z
New Revision: a9f14cdc6203c05425f8b17228ff368f7fd9ae29

URL: https://github.com/llvm/llvm-project/commit/a9f14cdc6203c05425f8b17228ff368f7fd9ae29
DIFF: https://github.com/llvm/llvm-project/commit/a9f14cdc6203c05425f8b17228ff368f7fd9ae29.diff

LOG: [ARM] Add bank conflict hazarding

Adds ARMBankConflictHazardRecognizer. This hazard recognizer
looks for a few situations where the same base pointer is used and
then checks whether the offsets lead to a bank conflict. Two
parameters are also added to permit overriding of the target
assumptions:

arm-data-bank-mask=<int> - Mask of bits which are to be checked for
conflicts.  If all these bits are equal in the offsets, there is a
conflict.
arm-assume-itcm-bankconflict=<bool> - Assume that there will be bank
conflicts on any loads to a constant pool.

This hazard recognizer is enabled for Cortex-M7, where the Technical
Reference Manual states that there are two DTCM banks banked using bit
2 and one ITCM bank.

Differential Revision: https://reviews.llvm.org/D93054

Added: 
    llvm/test/CodeGen/Thumb2/schedm7-hazard.ll

Modified: 
    llvm/lib/Target/ARM/ARM.td
    llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
    llvm/lib/Target/ARM/ARMBaseInstrInfo.h
    llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
    llvm/lib/Target/ARM/ARMHazardRecognizer.h
    llvm/lib/Target/ARM/ARMSubtarget.cpp
    llvm/lib/Target/ARM/ARMSubtarget.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 8111346c74f6..5d626e7d8e5a 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -660,7 +660,8 @@ def ProcR52     : SubtargetFeature<"r52", "ARMProcFamily", "CortexR52",
 
 def ProcM3      : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3",
                                    "Cortex-M3 ARM processors", []>;
-
+def ProcM7      : SubtargetFeature<"m7", "ARMProcFamily", "CortexM7",
+                                   "Cortex-M7 ARM processors", []>;
 
 //===----------------------------------------------------------------------===//
 // ARM Helper classes.
@@ -1191,6 +1192,7 @@ def : ProcessorModel<"cortex-m4", CortexM4Model,        [ARMv7em,
                                                          FeatureHasNoBranchPredictor]>;
 
 def : ProcessorModel<"cortex-m7", CortexM7Model,        [ARMv7em,
+                                                         ProcM7,
                                                          FeatureFPARMv8_D16,
                                                          FeatureUseMISched]>;
 

diff  --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index def631276950..563f2d38edf0 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -134,6 +134,31 @@ ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
   return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
 }
 
+// Called during:
+// - pre-RA scheduling
+// - post-RA scheduling when FeatureUseMISched is set
+ScheduleHazardRecognizer *ARMBaseInstrInfo::CreateTargetMIHazardRecognizer(
+    const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
+  MultiHazardRecognizer *MHR = new MultiHazardRecognizer();
+
+  // We would like to restrict this hazard recognizer to only
+  // post-RA scheduling; we can tell that we're post-RA because we don't
+  // track VRegLiveness.
+  // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
+  //            banks banked on bit 2.  Assume that TCMs are in use.
+  if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
+    MHR->AddHazardRecognizer(
+        std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true));
+
+  // Not inserting ARMHazardRecognizerFPMLx because that would change
+  // legacy behavior
+
+  auto BHR = TargetInstrInfo::CreateTargetMIHazardRecognizer(II, DAG);
+  MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
+  return MHR;
+}
+
+// Called during post-RA scheduling when FeatureUseMISched is not set
 ScheduleHazardRecognizer *ARMBaseInstrInfo::
 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
                                    const ScheduleDAG *DAG) const {

diff  --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 9b6572848ebe..deb008025b1d 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -131,6 +131,10 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo {
   CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
                                const ScheduleDAG *DAG) const override;
 
+  ScheduleHazardRecognizer *
+  CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
+                                 const ScheduleDAGMI *DAG) const override;
+
   ScheduleHazardRecognizer *
   CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
                                      const ScheduleDAG *DAG) const override;

diff  --git a/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
index 3cbc8da863c3..f083fa6662e9 100644
--- a/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -10,11 +10,19 @@
 #include "ARMBaseInstrInfo.h"
 #include "ARMBaseRegisterInfo.h"
 #include "ARMSubtarget.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+
 using namespace llvm;
 
+static cl::opt<int> DataBankMask("arm-data-bank-mask", cl::init(-1),
+                                 cl::Hidden);
+static cl::opt<bool> AssumeITCMConflict("arm-assume-itcm-bankconflict",
+                                        cl::init(false), cl::Hidden);
+
 static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
                          const TargetRegisterInfo &TRI) {
   // FIXME: Detect integer instructions properly.
@@ -93,3 +101,168 @@ void ARMHazardRecognizerFPMLx::AdvanceCycle() {
 void ARMHazardRecognizerFPMLx::RecedeCycle() {
   llvm_unreachable("reverse ARM hazard checking unsupported");
 }
+
+///////// Bank conflicts handled as hazards //////////////
+
+static bool getBaseOffset(const MachineInstr &MI, const MachineOperand *&BaseOp,
+                          int64_t &Offset) {
+
+  uint64_t TSFlags = MI.getDesc().TSFlags;
+  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
+  unsigned IndexMode =
+      (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+
+  // Address mode tells us what we want to know about operands for T2
+  // instructions (but not size).  It tells us size (but not about operands)
+  // for T1 instructions.
+  switch (AddrMode) {
+  default:
+    return false;
+  case ARMII::AddrModeT2_i8:
+    // t2LDRBT, t2LDRB_POST, t2LDRB_PRE, t2LDRBi8,
+    // t2LDRHT, t2LDRH_POST, t2LDRH_PRE, t2LDRHi8,
+    // t2LDRSBT, t2LDRSB_POST, t2LDRSB_PRE, t2LDRSBi8,
+    // t2LDRSHT, t2LDRSH_POST, t2LDRSH_PRE, t2LDRSHi8,
+    // t2LDRT, t2LDR_POST, t2LDR_PRE, t2LDRi8
+    BaseOp = &MI.getOperand(1);
+    Offset = (IndexMode == ARMII::IndexModePost)
+                 ? 0
+                 : (IndexMode == ARMII::IndexModePre ||
+                    IndexMode == ARMII::IndexModeUpd)
+                       ? MI.getOperand(3).getImm()
+                       : MI.getOperand(2).getImm();
+    return true;
+  case ARMII::AddrModeT2_i12:
+    // t2LDRBi12, t2LDRHi12
+    // t2LDRSBi12, t2LDRSHi12
+    // t2LDRi12
+    BaseOp = &MI.getOperand(1);
+    Offset = MI.getOperand(2).getImm();
+    return true;
+  case ARMII::AddrModeT2_i8s4:
+    // t2LDRD_POST, t2LDRD_PRE, t2LDRDi8
+    BaseOp = &MI.getOperand(2);
+    Offset = (IndexMode == ARMII::IndexModePost)
+                 ? 0
+                 : (IndexMode == ARMII::IndexModePre ||
+                    IndexMode == ARMII::IndexModeUpd)
+                       ? MI.getOperand(4).getImm()
+                       : MI.getOperand(3).getImm();
+    return true;
+  case ARMII::AddrModeT1_1:
+    // tLDRBi, tLDRBr (watch out!), TLDRSB
+  case ARMII::AddrModeT1_2:
+    // tLDRHi, tLDRHr (watch out!), TLDRSH
+  case ARMII::AddrModeT1_4:
+    // tLDRi, tLDRr (watch out!)
+    BaseOp = &MI.getOperand(1);
+    Offset = MI.getOperand(2).isImm() ? MI.getOperand(2).getImm() : 0;
+    return MI.getOperand(2).isImm();
+  }
+  return false;
+}
+
+ARMBankConflictHazardRecognizer::ARMBankConflictHazardRecognizer(
+    const ScheduleDAG *DAG, int64_t CPUBankMask, bool CPUAssumeITCMConflict)
+    : ScheduleHazardRecognizer(), MF(DAG->MF), DL(DAG->MF.getDataLayout()),
+      DataMask(DataBankMask.getNumOccurrences() ? int64_t(DataBankMask)
+                                                : CPUBankMask),
+      AssumeITCMBankConflict(AssumeITCMConflict.getNumOccurrences()
+                                 ? AssumeITCMConflict
+                                 : CPUAssumeITCMConflict) {
+  MaxLookAhead = 1;
+}
+
+ScheduleHazardRecognizer::HazardType
+ARMBankConflictHazardRecognizer::CheckOffsets(unsigned O0, unsigned O1) {
+  return (((O0 ^ O1) & DataMask) != 0) ? NoHazard : Hazard;
+}
+
+ScheduleHazardRecognizer::HazardType
+ARMBankConflictHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+  MachineInstr &L0 = *SU->getInstr();
+  if (!L0.mayLoad() || L0.mayStore() || L0.getNumMemOperands() != 1)
+    return NoHazard;
+
+  auto MO0 = *L0.memoperands().begin();
+  auto BaseVal0 = MO0->getValue();
+  auto BasePseudoVal0 = MO0->getPseudoValue();
+  int64_t Offset0 = 0;
+
+  if (MO0->getSize() > 4)
+    return NoHazard;
+
+  bool SPvalid = false;
+  const MachineOperand *SP = nullptr;
+  int64_t SPOffset0 = 0;
+
+  for (auto L1 : Accesses) {
+    auto MO1 = *L1->memoperands().begin();
+    auto BaseVal1 = MO1->getValue();
+    auto BasePseudoVal1 = MO1->getPseudoValue();
+    int64_t Offset1 = 0;
+
+    // Pointers to the same object
+    if (BaseVal0 && BaseVal1) {
+      const Value *Ptr0, *Ptr1;
+      Ptr0 = GetPointerBaseWithConstantOffset(BaseVal0, Offset0, DL, true);
+      Ptr1 = GetPointerBaseWithConstantOffset(BaseVal1, Offset1, DL, true);
+      if (Ptr0 == Ptr1 && Ptr0)
+        return CheckOffsets(Offset0, Offset1);
+    }
+
+    if (BasePseudoVal0 && BasePseudoVal1 &&
+        BasePseudoVal0->kind() == BasePseudoVal1->kind() &&
+        BasePseudoVal0->kind() == PseudoSourceValue::FixedStack) {
+      // Spills/fills
+      auto FS0 = cast<FixedStackPseudoSourceValue>(BasePseudoVal0);
+      auto FS1 = cast<FixedStackPseudoSourceValue>(BasePseudoVal1);
+      Offset0 = MF.getFrameInfo().getObjectOffset(FS0->getFrameIndex());
+      Offset1 = MF.getFrameInfo().getObjectOffset(FS1->getFrameIndex());
+      return CheckOffsets(Offset0, Offset1);
+    }
+
+    // Constant pools (likely in ITCM)
+    if (BasePseudoVal0 && BasePseudoVal1 &&
+        BasePseudoVal0->kind() == BasePseudoVal1->kind() &&
+        BasePseudoVal0->isConstantPool() && AssumeITCMBankConflict)
+      return Hazard;
+
+    // Is this a stack pointer-relative access?  We could in general try to
+    // use "is this the same register and is it unchanged?", but the
+    // memory operand tracking is highly likely to have already found that.
+    // What we're after here is bank conflicts between 
diff erent objects in
+    // the stack frame.
+    if (!SPvalid) { // set up SP
+      if (!getBaseOffset(L0, SP, SPOffset0) || SP->getReg().id() != ARM::SP)
+        SP = nullptr;
+      SPvalid = true;
+    }
+    if (SP) {
+      int64_t SPOffset1;
+      const MachineOperand *SP1;
+      if (getBaseOffset(*L1, SP1, SPOffset1) && SP1->getReg().id() == ARM::SP)
+        return CheckOffsets(SPOffset0, SPOffset1);
+    }
+  }
+
+  return NoHazard;
+}
+
+void ARMBankConflictHazardRecognizer::Reset() { Accesses.clear(); }
+
+void ARMBankConflictHazardRecognizer::EmitInstruction(SUnit *SU) {
+  MachineInstr &MI = *SU->getInstr();
+  if (!MI.mayLoad() || MI.mayStore() || MI.getNumMemOperands() != 1)
+    return;
+
+  auto MO = *MI.memoperands().begin();
+  uint64_t Size1 = MO->getSize();
+  if (Size1 > 4)
+    return;
+  Accesses.push_back(&MI);
+}
+
+void ARMBankConflictHazardRecognizer::AdvanceCycle() { Accesses.clear(); }
+
+void ARMBankConflictHazardRecognizer::RecedeCycle() { Accesses.clear(); }

diff  --git a/llvm/lib/Target/ARM/ARMHazardRecognizer.h b/llvm/lib/Target/ARM/ARMHazardRecognizer.h
index 6d29e0c82063..c1f1bcd0a629 100644
--- a/llvm/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/llvm/lib/Target/ARM/ARMHazardRecognizer.h
@@ -13,10 +13,21 @@
 #ifndef LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H
 #define LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H
 
+#include "ARMBaseInstrInfo.h"
+#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Support/DataTypes.h"
+#include <array>
+#include <initializer_list>
 
 namespace llvm {
 
+class DataLayout;
+class MachineFunction;
+class MachineInstr;
+class ScheduleDAG;
+
 // Hazards related to FP MLx instructions
 class ARMHazardRecognizerFPMLx : public ScheduleHazardRecognizer {
   MachineInstr *LastMI = nullptr;
@@ -32,6 +43,27 @@ class ARMHazardRecognizerFPMLx : public ScheduleHazardRecognizer {
   void RecedeCycle() override;
 };
 
+// Hazards related to bank conflicts
+class ARMBankConflictHazardRecognizer : public ScheduleHazardRecognizer {
+  SmallVector<MachineInstr *, 8> Accesses;
+  const MachineFunction &MF;
+  const DataLayout &DL;
+  int64_t DataMask;
+  bool AssumeITCMBankConflict;
+
+public:
+  ARMBankConflictHazardRecognizer(const ScheduleDAG *DAG, int64_t DDM,
+                                  bool ABC);
+  HazardType getHazardType(SUnit *SU, int Stalls) override;
+  void Reset() override;
+  void EmitInstruction(SUnit *SU) override;
+  void AdvanceCycle() override;
+  void RecedeCycle() override;
+
+private:
+  inline HazardType CheckOffsets(unsigned O0, unsigned O1);
+};
+
 } // end namespace llvm
 
 #endif

diff  --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index d90346df67da..e2a3d302b103 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -299,6 +299,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   case CortexR5:
   case CortexR7:
   case CortexM3:
+  case CortexM7:
   case CortexR52:
   case CortexX1:
     break;

diff  --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 778d3ba22a2f..ac7248ac29c9 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -66,6 +66,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
     CortexA8,
     CortexA9,
     CortexM3,
+    CortexM7,
     CortexR4,
     CortexR4F,
     CortexR5,
@@ -625,6 +626,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
   bool isCortexA15() const { return ARMProcFamily == CortexA15; }
   bool isSwift()    const { return ARMProcFamily == Swift; }
   bool isCortexM3() const { return ARMProcFamily == CortexM3; }
+  bool isCortexM7() const { return ARMProcFamily == CortexM7; }
   bool isLikeA9() const { return isCortexA9() || isCortexA15() || isKrait(); }
   bool isCortexR5() const { return ARMProcFamily == CortexR5; }
   bool isKrait() const { return ARMProcFamily == Krait; }

diff  --git a/llvm/test/CodeGen/Thumb2/schedm7-hazard.ll b/llvm/test/CodeGen/Thumb2/schedm7-hazard.ll
new file mode 100644
index 000000000000..9572300d8e22
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/schedm7-hazard.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=thumbv7m-none-eabi -mcpu=cortex-m7 | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -mtriple=thumbv7m-none-eabi -mcpu=cortex-m7 -arm-data-bank-mask=-1 | FileCheck %s --check-prefix=NOBANK
+
+; This tests the cortex-m7 bank conflict hazard recognizer.
+; Normally both loads would be scheduled early (both in the first cycle) due to
+; their latency. But will bank conflict to TCM so are scheduled in 
diff erent
+; cycles.
+
+define i32 @test(i32* %x0, i32 %y, i32 %z) {
+; CHECK-LABEL: test:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    ldr r3, [r0]
+; CHECK-NEXT:    subs r1, r3, r1
+; CHECK-NEXT:    ldr r0, [r0, #8]
+; CHECK-NEXT:    subs r1, r1, r2
+; CHECK-NEXT:    adds r1, #1
+; CHECK-NEXT:    muls r0, r1, r0
+; CHECK-NEXT:    bx lr
+; NOBANK-LABEL: test:
+; NOBANK:       @ %bb.0: @ %entry
+; NOBANK-NEXT:    ldr r3, [r0]
+; NOBANK-NEXT:    ldr r0, [r0, #8]
+; NOBANK-NEXT:    subs r1, r3, r1
+; NOBANK-NEXT:    subs r1, r1, r2
+; NOBANK-NEXT:    adds r1, #1
+; NOBANK-NEXT:    muls r0, r1, r0
+; NOBANK-NEXT:    bx lr
+entry:
+  %0 = load i32, i32* %x0, align 4
+  %mul3 = add nsw i32 %0, 1
+  %mul = sub nsw i32 %mul3, %y
+  %sub = sub nsw i32 %mul, %z
+  %arrayidx1 = getelementptr inbounds i32, i32* %x0, i32 2
+  %1 = load i32, i32* %arrayidx1, align 4
+  %mul2 = mul nsw i32 %sub, %1
+  ret i32 %mul2
+}


        


More information about the llvm-branch-commits mailing list