[llvm] [Xtensa] Implement support for the BranchRelaxation. (PR #113450)

Andrei Safronov via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 7 00:19:33 PST 2024


https://github.com/andreisfr updated https://github.com/llvm/llvm-project/pull/113450

>From 0460557bf6b82bfc4bd52c16477f2f44f96a4290 Mon Sep 17 00:00:00 2001
From: Andrei Safronov <safronov at espressif.com>
Date: Wed, 23 Oct 2024 18:26:31 +0300
Subject: [PATCH] [Xtensa] Implement branch relaxation.

Implement insertIndirectBranch function and other functions needed by Branch Relaxation pass.
Also implement estimateFunctionSizeInBytes function in  MachineFunction class.
---
 llvm/include/llvm/CodeGen/MachineFunction.h   |   4 +
 llvm/lib/CodeGen/MachineFunction.cpp          |  31 +++
 llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp   |   3 +
 .../lib/Target/Xtensa/XtensaFrameLowering.cpp |  21 +-
 llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp    | 207 +++++++++++++++
 llvm/lib/Target/Xtensa/XtensaInstrInfo.h      |  17 ++
 .../Target/Xtensa/XtensaMachineFunctionInfo.h |  42 +++
 llvm/lib/Target/Xtensa/XtensaRegisterInfo.h   |   4 +
 .../lib/Target/Xtensa/XtensaTargetMachine.cpp |  11 +
 llvm/lib/Target/Xtensa/XtensaTargetMachine.h  |   4 +
 llvm/test/CodeGen/Xtensa/branch-relaxation.ll | 246 ++++++++++++++++++
 llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll   |  12 +-
 12 files changed, 592 insertions(+), 10 deletions(-)
 create mode 100644 llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
 create mode 100644 llvm/test/CodeGen/Xtensa/branch-relaxation.ll

diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index 053e7062fb4995..21b192a27cad9d 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -868,6 +868,10 @@ class LLVM_ABI MachineFunction {
   /// it are renumbered.
   void RenumberBlocks(MachineBasicBlock *MBBFrom = nullptr);
 
+  /// Return an estimate of the function's code size,
+  /// taking into account block and function alignment
+  int64_t estimateFunctionSizeInBytes();
+
   /// print - Print out the MachineFunction in a format suitable for debugging
   /// to the specified stream.
   void print(raw_ostream &OS, const SlotIndexes* = nullptr) const;
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index b56888a0f71fe6..7eb1c5efb5cf71 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -378,6 +378,37 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
   MBBNumberingEpoch++;
 }
 
+int64_t MachineFunction::estimateFunctionSizeInBytes() {
+  const TargetInstrInfo &TII = *getSubtarget().getInstrInfo();
+  const Align FunctionAlignment = getAlignment();
+  MachineFunction::iterator MBBI = begin(), E = end();
+  /// Offset - Distance from the beginning of the function to the end
+  /// of the basic block.
+  int64_t Offset = 0;
+
+  for (; MBBI != E; ++MBBI) {
+    const Align Alignment = MBBI->getAlignment();
+    int64_t BlockSize = 0;
+
+    for (auto &MI : *MBBI) {
+      BlockSize += TII.getInstSizeInBytes(MI);
+    }
+
+    int64_t OffsetBB;
+    if (Alignment <= FunctionAlignment) {
+      OffsetBB = alignTo(Offset, Alignment);
+    } else {
+      // The alignment of this MBB is larger than the function's alignment, so
+      // we can't tell whether or not it will insert nops. Assume that it will.
+      OffsetBB = alignTo(Offset, Alignment) + Alignment.value() -
+                 FunctionAlignment.value();
+    }
+    Offset = OffsetBB + BlockSize;
+  }
+
+  return Offset;
+}
+
 /// This method iterates over the basic blocks and assigns their IsBeginSection
 /// and IsEndSection fields. This must be called after MBB layout is finalized
 /// and the SectionID's are assigned to MBBs.
diff --git a/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp b/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp
index db86637ecf83f3..95dfafc13f3908 100644
--- a/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp
@@ -69,6 +69,9 @@ void XtensaAsmPrinter::emitMachineConstantPoolValue(
     const BlockAddress *BA =
         cast<XtensaConstantPoolConstant>(ACPV)->getBlockAddress();
     MCSym = GetBlockAddressSymbol(BA);
+  } else if (ACPV->isMachineBasicBlock()) {
+    const MachineBasicBlock *MBB = cast<XtensaConstantPoolMBB>(ACPV)->getMBB();
+    MCSym = MBB->getSymbol();
   } else if (ACPV->isJumpTable()) {
     unsigned Idx = cast<XtensaConstantPoolJumpTable>(ACPV)->getIndex();
     MCSym = this->GetJTISymbol(Idx, false);
diff --git a/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp b/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp
index f46d386c9186aa..005ba10b813133 100644
--- a/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp
@@ -12,6 +12,7 @@
 
 #include "XtensaFrameLowering.h"
 #include "XtensaInstrInfo.h"
+#include "XtensaMachineFunctionInfo.h"
 #include "XtensaSubtarget.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -260,14 +261,26 @@ void XtensaFrameLowering::processFunctionBeforeFrameFinalized(
   // Set scavenging frame index if necessary.
   MachineFrameInfo &MFI = MF.getFrameInfo();
   uint64_t MaxSPOffset = MFI.estimateStackSize(MF);
+  auto *XtensaFI = MF.getInfo<XtensaMachineFunctionInfo>();
+  unsigned ScavSlotsNum = 0;
 
-  if (isInt<12>(MaxSPOffset))
-    return;
+  if (!isInt<12>(MaxSPOffset))
+    ScavSlotsNum = 1;
+
+  // Far branches over 18-bit offset require a spill slot for scratch register.
+  bool IsLargeFunction = !isInt<18>(MF.estimateFunctionSizeInBytes());
+  if (IsLargeFunction)
+    ScavSlotsNum = std::max(ScavSlotsNum, 1u);
 
   const TargetRegisterClass &RC = Xtensa::ARRegClass;
   unsigned Size = TRI->getSpillSize(RC);
   Align Alignment = TRI->getSpillAlign(RC);
-  int FI = MF.getFrameInfo().CreateStackObject(Size, Alignment, false);
+  for (unsigned I = 0; I < ScavSlotsNum; I++) {
+    int FI = MFI.CreateStackObject(Size, Alignment, false);
+    RS->addScavengingFrameIndex(FI);
 
-  RS->addScavengingFrameIndex(FI);
+    if (IsLargeFunction &&
+        XtensaFI->getBranchRelaxationScratchFrameIndex() == -1)
+      XtensaFI->setBranchRelaxationScratchFrameIndex(FI);
+  }
 }
diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp
index b2b4376ca040b6..4c440da715fefe 100644
--- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp
@@ -13,11 +13,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "XtensaInstrInfo.h"
+#include "XtensaConstantPoolValue.h"
+#include "XtensaMachineFunctionInfo.h"
 #include "XtensaTargetMachine.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
 
 #define GET_INSTRINFO_CTOR_DTOR
 #include "XtensaGenInstrInfo.inc"
@@ -186,6 +189,18 @@ void XtensaInstrInfo::loadImmediate(MachineBasicBlock &MBB,
   }
 }
 
+unsigned XtensaInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
+  switch (MI.getOpcode()) {
+  case TargetOpcode::INLINEASM: { // Inline Asm: Variable size.
+    const MachineFunction *MF = MI.getParent()->getParent();
+    const char *AsmStr = MI.getOperand(0).getSymbolName();
+    return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
+  }
+  default:
+    return MI.getDesc().getSize();
+  }
+}
+
 bool XtensaInstrInfo::reverseBranchCondition(
     SmallVectorImpl<MachineOperand> &Cond) const {
   assert(Cond.size() <= 4 && "Invalid branch condition!");
@@ -244,6 +259,74 @@ bool XtensaInstrInfo::reverseBranchCondition(
   }
 }
 
+MachineBasicBlock *
+XtensaInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
+  unsigned OpCode = MI.getOpcode();
+  switch (OpCode) {
+  case Xtensa::BR_JT:
+  case Xtensa::JX:
+    return nullptr;
+  case Xtensa::J:
+    return MI.getOperand(0).getMBB();
+  case Xtensa::BEQ:
+  case Xtensa::BNE:
+  case Xtensa::BLT:
+  case Xtensa::BLTU:
+  case Xtensa::BGE:
+  case Xtensa::BGEU:
+    return MI.getOperand(2).getMBB();
+  case Xtensa::BEQI:
+  case Xtensa::BNEI:
+  case Xtensa::BLTI:
+  case Xtensa::BLTUI:
+  case Xtensa::BGEI:
+  case Xtensa::BGEUI:
+    return MI.getOperand(2).getMBB();
+  case Xtensa::BEQZ:
+  case Xtensa::BNEZ:
+  case Xtensa::BLTZ:
+  case Xtensa::BGEZ:
+    return MI.getOperand(1).getMBB();
+  default:
+    llvm_unreachable("Unknown branch opcode");
+  }
+}
+
+bool XtensaInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
+                                            int64_t BrOffset) const {
+  switch (BranchOp) {
+  case Xtensa::J:
+    BrOffset -= 4;
+    return isIntN(18, BrOffset);
+  case Xtensa::JX:
+    return true;
+  case Xtensa::BR_JT:
+    return true;
+  case Xtensa::BEQ:
+  case Xtensa::BNE:
+  case Xtensa::BLT:
+  case Xtensa::BLTU:
+  case Xtensa::BGE:
+  case Xtensa::BGEU:
+  case Xtensa::BEQI:
+  case Xtensa::BNEI:
+  case Xtensa::BLTI:
+  case Xtensa::BLTUI:
+  case Xtensa::BGEI:
+  case Xtensa::BGEUI:
+    BrOffset -= 4;
+    return isIntN(8, BrOffset);
+  case Xtensa::BEQZ:
+  case Xtensa::BNEZ:
+  case Xtensa::BLTZ:
+  case Xtensa::BGEZ:
+    BrOffset -= 4;
+    return isIntN(12, BrOffset);
+  default:
+    llvm_unreachable("Unknown branch opcode");
+  }
+}
+
 bool XtensaInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
                                     MachineBasicBlock *&TBB,
                                     MachineBasicBlock *&FBB,
@@ -376,6 +459,130 @@ unsigned XtensaInstrInfo::insertBranch(
   return Count;
 }
 
+void XtensaInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
+                                           MachineBasicBlock &DestBB,
+                                           MachineBasicBlock &RestoreBB,
+                                           const DebugLoc &DL, int64_t BrOffset,
+                                           RegScavenger *RS) const {
+  assert(RS && "RegScavenger required for long branching");
+  assert(MBB.empty() &&
+         "new block should be inserted for expanding unconditional branch");
+  assert(MBB.pred_size() == 1);
+
+  MachineFunction *MF = MBB.getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  MachineConstantPool *ConstantPool = MF->getConstantPool();
+  auto *XtensaFI = MF->getInfo<XtensaMachineFunctionInfo>();
+  MachineBasicBlock *JumpToMBB = &DestBB;
+
+  if (!isInt<32>(BrOffset))
+    report_fatal_error(
+        "Branch offsets outside of the signed 32-bit range not supported");
+
+  Register ScratchReg = MRI.createVirtualRegister(&Xtensa::ARRegClass);
+  auto II = MBB.end();
+
+  // Create l32r without last operand. We will add this operand later when
+  // JumpToMMB will be calculated and placed to the ConstantPool.
+  MachineInstr &L32R = *BuildMI(MBB, II, DL, get(Xtensa::L32R), ScratchReg);
+  BuildMI(MBB, II, DL, get(Xtensa::JX)).addReg(ScratchReg, RegState::Kill);
+
+  RS->enterBasicBlockEnd(MBB);
+  Register ScavRegister =
+      RS->scavengeRegisterBackwards(Xtensa::ARRegClass, L32R.getIterator(),
+                                    /*RestoreAfter=*/false, /*SpAdj=*/0,
+                                    /*AllowSpill=*/false);
+  if (ScavRegister != Xtensa::NoRegister)
+    RS->setRegUsed(ScavRegister);
+  else {
+    // The case when there is no scavenged register needs special handling.
+    // Pick A8 because it doesn't make a difference
+    ScavRegister = Xtensa::A12;
+
+    int FrameIndex = XtensaFI->getBranchRelaxationScratchFrameIndex();
+    if (FrameIndex == -1)
+      report_fatal_error(
+          "Unable to properly handle scavenged register for indirect jump, "
+          "function code size is significantly larger than estimated");
+
+    storeRegToStackSlot(MBB, L32R, ScavRegister, /*IsKill=*/true, FrameIndex,
+                        &Xtensa::ARRegClass, &RI, Register());
+    RI.eliminateFrameIndex(std::prev(L32R.getIterator()),
+                           /*SpAdj=*/0, /*FIOperandNum=*/1);
+
+    loadRegFromStackSlot(RestoreBB, RestoreBB.end(), ScavRegister, FrameIndex,
+                         &Xtensa::ARRegClass, &RI, Register());
+    RI.eliminateFrameIndex(RestoreBB.back(),
+                           /*SpAdj=*/0, /*FIOperandNum=*/1);
+    JumpToMBB = &RestoreBB;
+  }
+
+  XtensaConstantPoolValue *C = XtensaConstantPoolMBB::Create(
+      MF->getFunction().getContext(), JumpToMBB, 0);
+  unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align(4));
+  L32R.addOperand(MachineOperand::CreateCPI(Idx, 0));
+
+  MRI.replaceRegWith(ScratchReg, ScavRegister);
+  MRI.clearVirtRegs();
+}
+
+unsigned XtensaInstrInfo::insertConstBranchAtInst(
+    MachineBasicBlock &MBB, MachineInstr *I, int64_t offset,
+    ArrayRef<MachineOperand> Cond, DebugLoc DL, int *BytesAdded) const {
+  // Shouldn't be a fall through.
+  assert(&MBB && "InsertBranch must not be told to insert a fallthrough");
+  assert(Cond.size() <= 4 &&
+         "Xtensa branch conditions have less than four components!");
+
+  if (Cond.empty() || (Cond[0].getImm() == Xtensa::J)) {
+    // Unconditional branch
+    MachineInstr *MI = BuildMI(MBB, I, DL, get(Xtensa::J)).addImm(offset);
+    if (BytesAdded && MI)
+      *BytesAdded += getInstSizeInBytes(*MI);
+    return 1;
+  }
+
+  unsigned Count = 0;
+  unsigned BR_C = Cond[0].getImm();
+  MachineInstr *MI = nullptr;
+  switch (BR_C) {
+  case Xtensa::BEQ:
+  case Xtensa::BNE:
+  case Xtensa::BLT:
+  case Xtensa::BLTU:
+  case Xtensa::BGE:
+  case Xtensa::BGEU:
+    MI = BuildMI(MBB, I, DL, get(BR_C))
+             .addImm(offset)
+             .addReg(Cond[1].getReg())
+             .addReg(Cond[2].getReg());
+    break;
+  case Xtensa::BEQI:
+  case Xtensa::BNEI:
+  case Xtensa::BLTI:
+  case Xtensa::BLTUI:
+  case Xtensa::BGEI:
+  case Xtensa::BGEUI:
+    MI = BuildMI(MBB, I, DL, get(BR_C))
+             .addImm(offset)
+             .addReg(Cond[1].getReg())
+             .addImm(Cond[2].getImm());
+    break;
+  case Xtensa::BEQZ:
+  case Xtensa::BNEZ:
+  case Xtensa::BLTZ:
+  case Xtensa::BGEZ:
+    MI = BuildMI(MBB, I, DL, get(BR_C)).addImm(offset).addReg(Cond[1].getReg());
+    break;
+  default:
+    llvm_unreachable("Invalid branch type!");
+  }
+  if (BytesAdded && MI)
+    *BytesAdded += getInstSizeInBytes(*MI);
+  ++Count;
+  return Count;
+}
+
 unsigned XtensaInstrInfo::insertBranchAtInst(MachineBasicBlock &MBB,
                                              MachineBasicBlock::iterator I,
                                              MachineBasicBlock *TBB,
diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.h b/llvm/lib/Target/Xtensa/XtensaInstrInfo.h
index 9f45cf7c29ada7..31da4d481d3097 100644
--- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.h
+++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.h
@@ -38,6 +38,8 @@ class XtensaInstrInfo : public XtensaGenInstrInfo {
   void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
                       MachineBasicBlock::iterator I) const;
 
+  unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
+
   // Return the XtensaRegisterInfo, which this class owns.
   const XtensaRegisterInfo &getRegisterInfo() const { return RI; }
 
@@ -77,6 +79,11 @@ class XtensaInstrInfo : public XtensaGenInstrInfo {
   bool
   reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
 
+  MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
+
+  bool isBranchOffsetInRange(unsigned BranchOpc,
+                             int64_t BrOffset) const override;
+
   bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
@@ -90,12 +97,22 @@ class XtensaInstrInfo : public XtensaGenInstrInfo {
                         const DebugLoc &DL,
                         int *BytesAdded = nullptr) const override;
 
+  void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &DestBB,
+                            MachineBasicBlock &RestoreBB, const DebugLoc &DL,
+                            int64_t BrOffset = 0,
+                            RegScavenger *RS = nullptr) const override;
+
   unsigned insertBranchAtInst(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I,
                               MachineBasicBlock *TBB,
                               ArrayRef<MachineOperand> Cond, const DebugLoc &DL,
                               int *BytesAdded) const;
 
+  unsigned insertConstBranchAtInst(MachineBasicBlock &MBB, MachineInstr *I,
+                                   int64_t offset,
+                                   ArrayRef<MachineOperand> Cond, DebugLoc DL,
+                                   int *BytesAdded) const;
+
   // Return true if MI is a conditional or unconditional branch.
   // When returning true, set Cond to the mask of condition-code
   // values on which the instruction will branch, and set Target
diff --git a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
new file mode 100644
index 00000000000000..c38c060b9387ff
--- /dev/null
+++ b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
@@ -0,0 +1,42 @@
+//==- XtensaMachineFunctionInfo.h - Xtensa machine function info --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares Xtensa-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_XTENSA_XTENSAMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_XTENSA_XTENSAMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class XtensaMachineFunctionInfo : public MachineFunctionInfo {
+  /// FrameIndex of the spill slot for the scratch register in BranchRelaxation.
+  int BranchRelaxationScratchFrameIndex = -1;
+
+public:
+  explicit XtensaMachineFunctionInfo(const Function &F,
+                                     const TargetSubtargetInfo *STI) {}
+
+  int getBranchRelaxationScratchFrameIndex() const {
+    return BranchRelaxationScratchFrameIndex;
+  }
+  void setBranchRelaxationScratchFrameIndex(int Index) {
+    BranchRelaxationScratchFrameIndex = Index;
+  }
+};
+
+} // namespace llvm
+
+#endif /* LLVM_LIB_TARGET_XTENSA_XTENSAMACHINEFUNCTIONINFO_H */
diff --git a/llvm/lib/Target/Xtensa/XtensaRegisterInfo.h b/llvm/lib/Target/Xtensa/XtensaRegisterInfo.h
index 8643ebb1c0f157..ede0eeb90b42de 100644
--- a/llvm/lib/Target/Xtensa/XtensaRegisterInfo.h
+++ b/llvm/lib/Target/Xtensa/XtensaRegisterInfo.h
@@ -38,6 +38,10 @@ class XtensaRegisterInfo : public XtensaGenRegisterInfo {
     return true;
   }
 
+  bool trackLivenessAfterRegAlloc(const MachineFunction &) const override {
+    return true;
+  }
+
   const uint16_t *
   getCalleeSavedRegs(const MachineFunction *MF = 0) const override;
   const uint32_t *getCallPreservedMask(const MachineFunction &MF,
diff --git a/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp b/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp
index 49c7faf84df1d3..8bbb2156e26904 100644
--- a/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp
@@ -14,6 +14,7 @@
 
 #include "XtensaTargetMachine.h"
 #include "TargetInfo/XtensaTargetInfo.h"
+#include "XtensaMachineFunctionInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -83,6 +84,13 @@ XtensaTargetMachine::getSubtargetImpl(const Function &F) const {
   return I.get();
 }
 
+MachineFunctionInfo *XtensaTargetMachine::createMachineFunctionInfo(
+    BumpPtrAllocator &Allocator, const Function &F,
+    const TargetSubtargetInfo *STI) const {
+  return XtensaMachineFunctionInfo::create<XtensaMachineFunctionInfo>(Allocator,
+                                                                      F, STI);
+}
+
 namespace {
 /// Xtensa Code Generator Pass Configuration Options.
 class XtensaPassConfig : public TargetPassConfig {
@@ -95,6 +103,7 @@ class XtensaPassConfig : public TargetPassConfig {
   }
 
   bool addInstSelector() override;
+  void addPreEmitPass() override;
 };
 } // end anonymous namespace
 
@@ -103,6 +112,8 @@ bool XtensaPassConfig::addInstSelector() {
   return false;
 }
 
+void XtensaPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); }
+
 TargetPassConfig *XtensaTargetMachine::createPassConfig(PassManagerBase &PM) {
   return new XtensaPassConfig(*this, PM);
 }
diff --git a/llvm/lib/Target/Xtensa/XtensaTargetMachine.h b/llvm/lib/Target/Xtensa/XtensaTargetMachine.h
index f371f22ed3d0e7..6975076b5d6997 100644
--- a/llvm/lib/Target/Xtensa/XtensaTargetMachine.h
+++ b/llvm/lib/Target/Xtensa/XtensaTargetMachine.h
@@ -45,6 +45,10 @@ class XtensaTargetMachine : public LLVMTargetMachine {
     return TLOF.get();
   }
 
+  MachineFunctionInfo *
+  createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F,
+                            const TargetSubtargetInfo *STI) const override;
+
 protected:
   mutable StringMap<std::unique_ptr<XtensaSubtarget>> SubtargetMap;
 };
diff --git a/llvm/test/CodeGen/Xtensa/branch-relaxation.ll b/llvm/test/CodeGen/Xtensa/branch-relaxation.ll
new file mode 100644
index 00000000000000..15dbd855f71acb
--- /dev/null
+++ b/llvm/test/CodeGen/Xtensa/branch-relaxation.ll
@@ -0,0 +1,246 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=XTENSA %s
+
+define i32 @jump(i1 %a) {
+; XTENSA-LABEL: jump:
+; XTENSA:         movi a8, 1
+; XTENSA-NEXT:    and a8, a2, a8
+; XTENSA-NEXT:    beqz a8, .LBB0_2
+; XTENSA-NEXT:  # %bb.1: # %iftrue
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    .space 1024
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    movi a2, 1
+; XTENSA-NEXT:    ret
+; XTENSA-NEXT:  .LBB0_2: # %jmp
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    movi a2, 1
+; XTENSA-NEXT:    ret
+  br i1 %a, label %iftrue, label %jmp
+
+jmp:
+  call void asm sideeffect "", ""()
+  br label %tail
+
+iftrue:
+  call void asm sideeffect "", ""()
+  br label %space
+
+space:
+  call void asm sideeffect ".space 1024", ""()
+  br label %tail
+
+tail:
+  ret i32 1
+}
+
+define i32 @jx(i1 %a) {
+; XTENSA-LABEL: jx:
+; XTENSA:         addi a8, a1, -16
+; XTENSA-NEXT:    or a1, a8, a8
+; XTENSA-NEXT:    .cfi_def_cfa_offset 16
+; XTENSA-NEXT:    movi a8, 1
+; XTENSA-NEXT:    and a8, a2, a8
+; XTENSA-NEXT:    bnez a8, .LBB1_1
+; XTENSA-NEXT:  # %bb.4:
+; XTENSA-NEXT:    l32r a8, .LCPI1_0
+; XTENSA-NEXT:    jx a8
+; XTENSA-NEXT:  .LBB1_1: # %iftrue
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    .space 1048576
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    j .LBB1_3
+; XTENSA-NEXT:  .LBB1_2: # %jmp
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:  .LBB1_3: # %tail
+; XTENSA-NEXT:    movi a2, 1
+; XTENSA-NEXT:    addi a8, a1, 16
+; XTENSA-NEXT:    or a1, a8, a8
+; XTENSA-NEXT:    ret
+  br i1 %a, label %iftrue, label %jmp
+
+jmp:
+  call void asm sideeffect "", ""()
+  br label %tail
+
+iftrue:
+  call void asm sideeffect "", ""()
+  br label %space
+
+space:
+  call void asm sideeffect ".space 1048576", ""()
+  br label %tail
+
+tail:
+  ret i32 1
+}
+
+define void @relax_spill() {
+; XTENSA-LABEL: relax_spill:
+; XTENSA:         addi a8, a1, -32
+; XTENSA-NEXT:    or a1, a8, a8
+; XTENSA-NEXT:    .cfi_def_cfa_offset 32
+; XTENSA-NEXT:    s32i a12, a1, 16 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a13, a1, 12 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a14, a1, 8 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a15, a1, 4 # 4-byte Folded Spill
+; XTENSA-NEXT:    .cfi_offset a12, -4
+; XTENSA-NEXT:    .cfi_offset a13, -8
+; XTENSA-NEXT:    .cfi_offset a14, -12
+; XTENSA-NEXT:    .cfi_offset a15, -16
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    addi a2, a3, 1
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    addi a3, a3, 1
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    addi a4, a3, 1
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    addi a5, a3, 1
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    addi a6, a3, 1
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    addi a7, a3, 1
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    addi a8, a3, 1
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    addi a9, a3, 1
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    addi a10, a3, 1
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    addi a11, a3, 1
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    addi a12, a3, 1
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    addi a13, a3, 1
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    addi a14, a3, 1
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    addi a15, a3, 1
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    beq a5, a6, .LBB2_1
+; XTENSA-NEXT:  # %bb.3:
+; XTENSA-NEXT:    s32i a12, a1, 0
+; XTENSA-NEXT:    l32r a12, .LCPI2_0
+; XTENSA-NEXT:    jx a12
+; XTENSA-NEXT:  .LBB2_1: # %iftrue
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    .space 536870912
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    j .LBB2_2
+; XTENSA-NEXT:  .LBB2_4: # %iffalse
+; XTENSA-NEXT:    l32i a12, a1, 0
+; XTENSA-NEXT:  .LBB2_2: # %iffalse
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    # reg use a2
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    # reg use a3
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    # reg use a4
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    # reg use a5
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    # reg use a6
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    # reg use a7
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    # reg use a8
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    # reg use a9
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    # reg use a10
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    # reg use a11
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    # reg use a12
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    # reg use a13
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    # reg use a14
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    #APP
+; XTENSA-NEXT:    # reg use a15
+; XTENSA-NEXT:    #NO_APP
+; XTENSA-NEXT:    l32i a15, a1, 4 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a14, a1, 8 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a13, a1, 12 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a12, a1, 16 # 4-byte Folded Reload
+; XTENSA-NEXT:    addi a8, a1, 32
+; XTENSA-NEXT:    or a1, a8, a8
+; XTENSA-NEXT:    ret
+  call void asm sideeffect "", ""()
+  %a2 = call i32 asm sideeffect "addi a2, a3, 1", "={a2}"()
+  %a3 = call i32 asm sideeffect "addi a3, a3, 1", "={a3}"()
+  %a4 = call i32 asm sideeffect "addi a4, a3, 1", "={a4}"()
+  %a5 = call i32 asm sideeffect "addi a5, a3, 1", "={a5}"()
+  %a6 = call i32 asm sideeffect "addi a6, a3, 1", "={a6}"()
+  %a7 = call i32 asm sideeffect "addi a7, a3, 1", "={a7}"()
+  %a8 = call i32 asm sideeffect "addi a8, a3, 1", "={a8}"()
+  %a9 = call i32 asm sideeffect "addi a9, a3, 1", "={a9}"()
+  %a10 = call i32 asm sideeffect "addi a10, a3, 1", "={a10}"()
+  %a11 = call i32 asm sideeffect "addi a11, a3, 1", "={a11}"()
+  %a12 = call i32 asm sideeffect "addi a12, a3, 1", "={a12}"()
+  %a13 = call i32 asm sideeffect "addi a13, a3, 1", "={a13}"()
+  %a14 = call i32 asm sideeffect "addi a14, a3, 1", "={a14}"()
+  %a15 = call i32 asm sideeffect "addi a15, a3, 1", "={a15}"()
+
+  %cmp = icmp eq i32 %a5, %a6
+  br i1 %cmp, label %iftrue, label %iffalse
+
+iftrue:
+  call void asm sideeffect ".space 536870912", ""()
+  br label %iffalse
+
+iffalse:
+  call void asm sideeffect "", ""()
+  call void asm sideeffect "# reg use $0", "{a2}"(i32 %a2)
+  call void asm sideeffect "# reg use $0", "{a3}"(i32 %a3)
+  call void asm sideeffect "# reg use $0", "{a4}"(i32 %a4)
+  call void asm sideeffect "# reg use $0", "{a5}"(i32 %a5)
+  call void asm sideeffect "# reg use $0", "{a6}"(i32 %a6)
+  call void asm sideeffect "# reg use $0", "{a7}"(i32 %a7)
+  call void asm sideeffect "# reg use $0", "{a8}"(i32 %a8)
+  call void asm sideeffect "# reg use $0", "{a9}"(i32 %a9)
+  call void asm sideeffect "# reg use $0", "{a10}"(i32 %a10)
+  call void asm sideeffect "# reg use $0", "{a11}"(i32 %a11)
+  call void asm sideeffect "# reg use $0", "{a12}"(i32 %a12)
+  call void asm sideeffect "# reg use $0", "{a13}"(i32 %a13)
+  call void asm sideeffect "# reg use $0", "{a14}"(i32 %a14)
+  call void asm sideeffect "# reg use $0", "{a15}"(i32 %a15)
+  ret void
+}
diff --git a/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll
index bad57d58b28a67..60303235386256 100644
--- a/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll
@@ -8,8 +8,8 @@ declare i32 @llvm.ctpop.i32(i32)
 
 define i32 @test_cttz_i32(i32 %a) nounwind {
 ; XTENSA-LABEL: test_cttz_i32:
-; XTENSA:         beqz a2, .LBB0_1
-; XTENSA-NEXT:  # %bb.2: # %cond.false
+; XTENSA:         beqz a2, .LBB0_2
+; XTENSA-NEXT:  # %bb.1: # %cond.false
 ; XTENSA-NEXT:    movi a8, -1
 ; XTENSA-NEXT:    xor a8, a2, a8
 ; XTENSA-NEXT:    addi a9, a2, -1
@@ -33,7 +33,7 @@ define i32 @test_cttz_i32(i32 %a) nounwind {
 ; XTENSA-NEXT:    add a8, a8, a9
 ; XTENSA-NEXT:    extui a2, a8, 24, 8
 ; XTENSA-NEXT:    ret
-; XTENSA-NEXT:  .LBB0_1:
+; XTENSA-NEXT:  .LBB0_2:
 ; XTENSA-NEXT:    movi a2, 32
 ; XTENSA-NEXT:    ret
   %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 false)
@@ -71,8 +71,8 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind {
 
 define i32 @test_ctlz_i32(i32 %a) nounwind {
 ; XTENSA-LABEL: test_ctlz_i32:
-; XTENSA:         beqz a2, .LBB2_1
-; XTENSA-NEXT:  # %bb.2: # %cond.false
+; XTENSA:         beqz a2, .LBB2_2
+; XTENSA-NEXT:  # %bb.1: # %cond.false
 ; XTENSA-NEXT:    srli a8, a2, 1
 ; XTENSA-NEXT:    or a8, a2, a8
 ; XTENSA-NEXT:    srli a9, a8, 2
@@ -104,7 +104,7 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
 ; XTENSA-NEXT:    add a8, a8, a9
 ; XTENSA-NEXT:    extui a2, a8, 24, 8
 ; XTENSA-NEXT:    ret
-; XTENSA-NEXT:  .LBB2_1:
+; XTENSA-NEXT:  .LBB2_2:
 ; XTENSA-NEXT:    movi a2, 32
 ; XTENSA-NEXT:    ret
   %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 false)



More information about the llvm-commits mailing list