[lld] [llvm] [Hexagon][llvm-objdump] Improve disassembly of Hexagon bundles (PR #145807)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 26 11:37:36 PDT 2025


https://github.com/quic-areg updated https://github.com/llvm/llvm-project/pull/145807

>From cf303065f845fde0247d47a9dac53a5eb7001118 Mon Sep 17 00:00:00 2001
From: quic-areg <aregmi at quicinc.com>
Date: Thu, 19 Jun 2025 11:00:52 -0700
Subject: [PATCH 1/2] [Hexagon][llvm-objdump] Improve disassembly of Hexagon
 bundles

Hexagon instructions are VLIW "bundles" of up to four instruction words
encoded as a single MCInst with operands for each sub-instruction.
Previously, the disassembler's getInstruction() returned the full bundle,
which made it difficult to work with llvm-objdump.

For example, since all instructions are bundles, and bundles do not branch,
branch targets could not be printed.

This patch modifies the Hexagon disassembler to return individual
sub-instructions instead of entire bundles, enabling correct printing
of branch targets and relocations. It also introduces
`MCDisassembler::getInstructionBundle` for cases where the full bundle is
still needed.

By default, llvm-objdump separates instructions with newlines. However,
this does not work well for Hexagon syntax:

  { inst1
    inst2
    inst3
    inst4 <branch> } :endloop0

Instructions may be followed by a closing brace, a closing brace with
`:endloop`, or a newline. Branches must appear within the braces.

To address this, `PrettyPrinter::getInstructionSeparator()` is added and
overridden for Hexagon.
---
 lld/test/ELF/hexagon-plt.s                    |  18 +-
 lld/test/ELF/hexagon-shared.s                 |   2 +-
 lld/test/ELF/hexagon-tls-gd-xform.s           |   4 +-
 .../llvm/MC/MCDisassembler/MCDisassembler.h   |  12 ++
 .../Disassembler/HexagonDisassembler.cpp      | 109 ++++++++---
 .../MCTargetDesc/HexagonInstPrinter.cpp       |  35 ++--
 .../MCTargetDesc/HexagonMCTargetDesc.cpp      |  19 +-
 llvm/test/MC/Hexagon/two_ext.s                |   4 +-
 .../ELF/Hexagon/branch-targets.yaml           |  26 +++
 llvm/tools/llvm-mc/Disassembler.cpp           |   6 +-
 llvm/tools/llvm-objdump/llvm-objdump.cpp      | 179 +++++++++++-------
 11 files changed, 275 insertions(+), 139 deletions(-)
 create mode 100644 llvm/test/tools/llvm-objdump/ELF/Hexagon/branch-targets.yaml

diff --git a/lld/test/ELF/hexagon-plt.s b/lld/test/ELF/hexagon-plt.s
index 679de82923a72..780dc434a6698 100644
--- a/lld/test/ELF/hexagon-plt.s
+++ b/lld/test/ELF/hexagon-plt.s
@@ -30,31 +30,31 @@
 # DIS:      <_start>:
 ## Direct call
 ## Call foo directly
-# DIS-NEXT:   { call 0x2003c }
+# DIS-NEXT:   { call 0x2003c <foo> }
 ## Call bar via plt
-# DIS-NEXT:   { call 0x20060 }
+# DIS-NEXT:   { call 0x20060 <bar at plt> }
 ## Call weak via plt
-# DIS-NEXT:   { call 0x20070 }
+# DIS-NEXT:   { call 0x20070 <weak at plt> }
 # DIS-NEXT: { 	immext(#0)
 
 ## Call foo directly
-# DIS-NEXT: if (p0) jump:nt 0x2003c }
+# DIS-NEXT: if (p0) jump:nt 0x2003c <foo> }
 # DIS-NEXT: { 	immext(#64)
 ## Call bar via plt
-# DIS-NEXT: if (p0) jump:nt 0x20060 }
+# DIS-NEXT: if (p0) jump:nt 0x20060 <bar at plt> }
 # DIS-NEXT: { 	immext(#64)
 ## Call weak via plt
-# DIS-NEXT: if (p0) jump:nt 0x20070 }
+# DIS-NEXT: if (p0) jump:nt 0x20070 <weak at plt> }
 # DIS-NEXT: { 	immext(#0)
 
 ## Call foo directly
-# DIS-NEXT: r0 = #0 ; jump 0x2003c }
+# DIS-NEXT: r0 = #0 ; jump 0x2003c <foo> }
 # DIS-NEXT: { 	immext(#0)
 ## Call bar via plt
-# DIS-NEXT: r0 = #0 ; jump 0x20060 }
+# DIS-NEXT: r0 = #0 ; jump 0x20060 <bar at plt> }
 # DIS-NEXT: { 	immext(#0)
 ## Call weak via plt
-# DIS-NEXT: r0 = #0 ; jump 0x20070 }
+# DIS-NEXT: r0 = #0 ; jump 0x20070 <weak at plt> }
 
 # DIS:      <foo>:
 # DIS-NEXT:   2003c:
diff --git a/lld/test/ELF/hexagon-shared.s b/lld/test/ELF/hexagon-shared.s
index cc62662d278e2..7f7390f1fa8d8 100644
--- a/lld/test/ELF/hexagon-shared.s
+++ b/lld/test/ELF/hexagon-shared.s
@@ -88,7 +88,7 @@ pvar:
 # PLT-NEXT: jumpr r28 }
 
 # TEXT:  bc 00 01 00 000100bc
-# TEXT: { 	call 0x10300 }
+# TEXT: { 	call 0x10300 <bar at plt> }
 # TEXT: if (p0) jump:nt 0x10300
 # TEXT: r0 = #0 ; jump 0x10300
 # TEXT: r0 = add(r1,##-65548)
diff --git a/lld/test/ELF/hexagon-tls-gd-xform.s b/lld/test/ELF/hexagon-tls-gd-xform.s
index 65aeb118fcb33..ade54e8a16fad 100644
--- a/lld/test/ELF/hexagon-tls-gd-xform.s
+++ b/lld/test/ELF/hexagon-tls-gd-xform.s
@@ -18,10 +18,10 @@
 _start:
 .ifdef GDPLT
                         call x at gdplt
-# CHECK_GDPLT:  101ec: { call 0x10220 }
+# CHECK_GDPLT:  101ec: { call 0x10220 <__tls_get_addr at plt> }
 .else
                   call x
-# CHECK:  101b8: { call 0x101e0 }
+# CHECK:  101b8: { call 0x101e0 <x at plt> }
 .endif
 
 # CHECK_GDPLT:        10220: { immext(#0x20040)
diff --git a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
index 3a7ca1a69ab85..cae2fbcac1fef 100644
--- a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
+++ b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
@@ -136,6 +136,18 @@ class LLVM_ABI MCDisassembler {
                                       ArrayRef<uint8_t> Bytes, uint64_t Address,
                                       raw_ostream &CStream) const = 0;
 
+  /// Returns the disassembly of an instruction bundle for VLIW architectures
+  /// like Hexagon.
+  ///
+  /// \param Instr    - An MCInst to populate with the contents of
+  /// the Bundle with sub-instructions encoded as Inst operands.
+  virtual DecodeStatus getInstructionBundle(MCInst &Instr, uint64_t &Size,
+                                            ArrayRef<uint8_t> Bytes,
+                                            uint64_t Address,
+                                            raw_ostream &CStream) const {
+    return Fail;
+  }
+
   /// Used to perform separate target specific disassembly for a particular
   /// symbol. May parse any prelude that precedes instructions after the
   /// start of a symbol, or the entire symbol.
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 5bd31707acb6f..22cff7c80fa01 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -43,12 +43,12 @@ namespace {
 class HexagonDisassembler : public MCDisassembler {
 public:
   std::unique_ptr<MCInstrInfo const> const MCII;
-  std::unique_ptr<MCInst *> CurrentBundle;
+  mutable std::unique_ptr<MCInst> CurrentBundle;
   mutable MCInst const *CurrentExtender;
 
   HexagonDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
                       MCInstrInfo const *MCII)
-      : MCDisassembler(STI, Ctx), MCII(MCII), CurrentBundle(new MCInst *),
+      : MCDisassembler(STI, Ctx), MCII(MCII), CurrentBundle(nullptr),
         CurrentExtender(nullptr) {}
 
   DecodeStatus getSingleInstruction(MCInst &Instr, MCInst &MCB,
@@ -57,7 +57,23 @@ class HexagonDisassembler : public MCDisassembler {
   DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
                               ArrayRef<uint8_t> Bytes, uint64_t Address,
                               raw_ostream &CStream) const override;
+
+  DecodeStatus getInstructionBundle(MCInst &Instr, uint64_t &Size,
+                                    ArrayRef<uint8_t> Bytes, uint64_t Address,
+                                    raw_ostream &CStream) const override;
+
   void remapInstruction(MCInst &Instr) const;
+
+private:
+  bool makeBundle(ArrayRef<uint8_t> Bytes, uint64_t Address,
+                  uint64_t &BytesToSkip, raw_ostream &CS) const;
+
+  void resetBundle() const {
+    CurrentBundle.reset();
+    CurrentInstruction = nullptr;
+  }
+
+  mutable MCOperand *CurrentInstruction = nullptr;
 };
 
 static uint64_t fullValue(HexagonDisassembler const &Disassembler, MCInst &MI,
@@ -171,43 +187,88 @@ LLVMInitializeHexagonDisassembler() {
                                          createHexagonDisassembler);
 }
 
-DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
-                                                 ArrayRef<uint8_t> Bytes,
-                                                 uint64_t Address,
-                                                 raw_ostream &CS) const {
-  CommentStream = &CS;
-
-  DecodeStatus Result = DecodeStatus::Success;
+bool HexagonDisassembler::makeBundle(ArrayRef<uint8_t> Bytes, uint64_t Address,
+                                     uint64_t &BytesToSkip,
+                                     raw_ostream &CS) const {
   bool Complete = false;
-  Size = 0;
+  DecodeStatus Result = DecodeStatus::Success;
 
-  *CurrentBundle = &MI;
-  MI.setOpcode(Hexagon::BUNDLE);
-  MI.addOperand(MCOperand::createImm(0));
+  CurrentBundle.reset(new MCInst);
+  CurrentBundle->setOpcode(Hexagon::BUNDLE);
+  CurrentBundle->addOperand(MCOperand::createImm(0));
   while (Result == Success && !Complete) {
     if (Bytes.size() < HEXAGON_INSTR_SIZE)
-      return MCDisassembler::Fail;
+      return false;
     MCInst *Inst = getContext().createMCInst();
-    Result = getSingleInstruction(*Inst, MI, Bytes, Address, CS, Complete);
-    MI.addOperand(MCOperand::createInst(Inst));
-    Size += HEXAGON_INSTR_SIZE;
+    Result = getSingleInstruction(*Inst, *CurrentBundle, Bytes, Address, CS,
+                                  Complete);
+    CurrentBundle->addOperand(MCOperand::createInst(Inst));
+    BytesToSkip += HEXAGON_INSTR_SIZE;
     Bytes = Bytes.slice(HEXAGON_INSTR_SIZE);
   }
   if (Result == MCDisassembler::Fail)
-    return Result;
-  if (Size > HEXAGON_MAX_PACKET_SIZE)
-    return MCDisassembler::Fail;
+    return false;
+  if (BytesToSkip > HEXAGON_MAX_PACKET_SIZE)
+    return false;
 
   const auto ArchSTI = Hexagon_MC::getArchSubtarget(&STI);
   const auto STI_ = (ArchSTI != nullptr) ? *ArchSTI : STI;
-  HexagonMCChecker Checker(getContext(), *MCII, STI_, MI,
+  HexagonMCChecker Checker(getContext(), *MCII, STI_, *CurrentBundle,
                            *getContext().getRegisterInfo(), false);
   if (!Checker.check())
-    return MCDisassembler::Fail;
-  remapInstruction(MI);
+    return false;
+  remapInstruction(*CurrentBundle);
+  return true;
+}
+
+DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+                                                 ArrayRef<uint8_t> Bytes,
+                                                 uint64_t Address,
+                                                 raw_ostream &CS) const {
+  CommentStream = &CS;
+
+  Size = 0;
+  uint64_t BytesToSkip = 0;
+
+  if (!CurrentBundle) {
+    if (!makeBundle(Bytes, Address, BytesToSkip, CS)) {
+      Size = BytesToSkip;
+      resetBundle();
+      return MCDisassembler::Fail;
+    }
+    CurrentInstruction = (CurrentBundle->begin() + 1);
+  }
+
+  MI = *(CurrentInstruction->getInst());
+  Size = HEXAGON_INSTR_SIZE;
+  if (++CurrentInstruction == CurrentBundle->end())
+    resetBundle();
   return MCDisassembler::Success;
 }
 
+DecodeStatus HexagonDisassembler::getInstructionBundle(MCInst &MI,
+                                                       uint64_t &Size,
+                                                       ArrayRef<uint8_t> Bytes,
+                                                       uint64_t Address,
+                                                       raw_ostream &CS) const {
+  CommentStream = &CS;
+  Size = 0;
+  uint64_t BytesToSkip = 0;
+  assert(!CurrentBundle);
+
+  if (!makeBundle(Bytes, Address, BytesToSkip, CS)) {
+    Size = BytesToSkip;
+    resetBundle();
+    return MCDisassembler::Fail;
+  }
+
+  MI = *CurrentBundle;
+  Size = HEXAGON_INSTR_SIZE * HexagonMCInstrInfo::bundleSize(MI);
+  resetBundle();
+
+  return Success;
+}
+
 void HexagonDisassembler::remapInstruction(MCInst &Instr) const {
   for (auto I: HexagonMCInstrInfo::bundleInstructions(Instr)) {
     auto &MI = const_cast<MCInst &>(*I.getInst());
@@ -482,7 +543,7 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
     unsigned Offset = 1;
     bool Vector = HexagonMCInstrInfo::isVector(*MCII, MI);
     bool PrevVector = false;
-    auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle);
+    auto Instructions = HexagonMCInstrInfo::bundleInstructions(*CurrentBundle);
     auto i = Instructions.end() - 1;
     for (auto n = Instructions.begin() - 1;; --i, ++Offset) {
       if (i == n)
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 9030e43b7149f..50b66772212a4 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -33,30 +33,17 @@ void HexagonInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) {
 void HexagonInstPrinter::printInst(const MCInst *MI, uint64_t Address,
                                    StringRef Annot, const MCSubtargetInfo &STI,
                                    raw_ostream &OS) {
-  assert(HexagonMCInstrInfo::isBundle(*MI));
-  assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE);
-  assert(HexagonMCInstrInfo::bundleSize(*MI) > 0);
-  HasExtender = false;
-  for (auto const &I : HexagonMCInstrInfo::bundleInstructions(*MI)) {
-    MCInst const &MCI = *I.getInst();
-    if (HexagonMCInstrInfo::isDuplex(MII, MCI)) {
-      printInstruction(MCI.getOperand(1).getInst(), Address, OS);
-      OS << '\v';
-      HasExtender = false;
-      printInstruction(MCI.getOperand(0).getInst(), Address, OS);
-    } else
-      printInstruction(&MCI, Address, OS);
-    HasExtender = HexagonMCInstrInfo::isImmext(MCI);
-    OS << "\n";
-  }
-
-  bool IsLoop0 = HexagonMCInstrInfo::isInnerLoop(*MI);
-  bool IsLoop1 = HexagonMCInstrInfo::isOuterLoop(*MI);
-  if (IsLoop0) {
-    OS << (IsLoop1 ? " :endloop01" : " :endloop0");
-  } else if (IsLoop1) {
-    OS << " :endloop1";
-  }
+  if (HexagonMCInstrInfo::isDuplex(MII, *MI)) {
+    printInstruction(MI->getOperand(1).getInst(), Address, OS);
+    OS << '\v';
+    HasExtender = false;
+    printInstruction(MI->getOperand(0).getInst(), Address, OS);
+  } else
+    printInstruction(MI, Address, OS);
+  HasExtender = HexagonMCInstrInfo::isImmext(*MI);
+  if ((MI->getOpcode() & HexagonII::INST_PARSE_MASK) ==
+      HexagonII::INST_PARSE_PACKET_END)
+    HasExtender = false;
 }
 
 void HexagonInstPrinter::printOperand(MCInst const *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 980df819b2c26..bfea50e2d6dc0 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -252,8 +252,21 @@ class HexagonTargetAsmStreamer : public HexagonTargetStreamer {
     std::string Buffer;
     {
       raw_string_ostream TempStream(Buffer);
-      InstPrinter.printInst(&Inst, Address, "", STI, TempStream);
+      for (auto &I : HexagonMCInstrInfo::bundleInstructions(Inst)) {
+        InstPrinter.printInst(I.getInst(), Address, "", STI, TempStream);
+        TempStream << "\n";
+      }
+    }
+
+    std::string LoopString = "";
+    bool IsLoop0 = HexagonMCInstrInfo::isInnerLoop(Inst);
+    bool IsLoop1 = HexagonMCInstrInfo::isOuterLoop(Inst);
+    if (IsLoop0) {
+      LoopString += (IsLoop1 ? " :endloop01" : " :endloop0");
+    } else if (IsLoop1) {
+      LoopString += " :endloop1";
     }
+
     StringRef Contents(Buffer);
     auto PacketBundle = Contents.rsplit('\n');
     auto HeadTail = PacketBundle.first.split('\n');
@@ -275,9 +288,9 @@ class HexagonTargetAsmStreamer : public HexagonTargetStreamer {
     }
 
     if (HexagonMCInstrInfo::isMemReorderDisabled(Inst))
-      OS << "\n\t} :mem_noshuf" << PacketBundle.second;
+      OS << "\n\t} :mem_noshuf" << LoopString;
     else
-      OS << "\t}" << PacketBundle.second;
+      OS << "\t}" << LoopString;
   }
 
   void finish() override { finishAttributeSection(); }
diff --git a/llvm/test/MC/Hexagon/two_ext.s b/llvm/test/MC/Hexagon/two_ext.s
index 28b2aa3f1ecae..09b51c5f029a7 100644
--- a/llvm/test/MC/Hexagon/two_ext.s
+++ b/llvm/test/MC/Hexagon/two_ext.s
@@ -6,7 +6,7 @@
   if (!p1) call foo_b
 }
 # CHECK: 00004000 { immext(#0)
-# CHECK: 5d004100   if (p1) call 0x0
+# CHECK: 5d004100   if (p1) call 0x0 <.text>
 # CHECK: 00004000   immext(#0)
-# CHECK: 5d20c100   if (!p1) call 0x0 }
+# CHECK: 5d20c100   if (!p1) call 0x0 <.text> }
 
diff --git a/llvm/test/tools/llvm-objdump/ELF/Hexagon/branch-targets.yaml b/llvm/test/tools/llvm-objdump/ELF/Hexagon/branch-targets.yaml
new file mode 100644
index 0000000000000..61ac32c658f6c
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/ELF/Hexagon/branch-targets.yaml
@@ -0,0 +1,26 @@
+## Check that branch targets are printed within instruction packets for Hexagon
+
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS32
+  Data:            ELFDATA2LSB
+  Type:            ET_REL
+  Machine:         EM_HEXAGON
+  Flags:           [ EF_HEXAGON_MACH_V68, EF_HEXAGON_ISA_V68 ]
+Sections:
+  - Name:            .text
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    AddressAlign:    0x10
+    Content:         00C09DA000C000781EC01E9600C09DA000C0005A1EC01E96
+...
+
+# RUN: yaml2obj %s | llvm-objdump -d - | FileCheck %s
+
+#      CHECK: 00000000 <.text>:
+# CHECK-NEXT:        0:       00 c0 9d a0     a09dc000 {      allocframe(#0x0) }
+# CHECK-NEXT:        4:       00 c0 00 78     7800c000 {      r0 = #0x0 }
+# CHECK-NEXT:        8:       1e c0 1e 96     961ec01e {      dealloc_return }
+# CHECK-NEXT:        c:       00 c0 9d a0     a09dc000 {      allocframe(#0x0) }
+# CHECK-NEXT:       10:       00 c0 00 5a     5a00c000 {      call 0x10 <.text+0x10> }
+# CHECK-NEXT:       14:       1e c0 1e 96     961ec01e {      dealloc_return }
\ No newline at end of file
diff --git a/llvm/tools/llvm-mc/Disassembler.cpp b/llvm/tools/llvm-mc/Disassembler.cpp
index 16897054fbea8..a4570ff932081 100644
--- a/llvm/tools/llvm-mc/Disassembler.cpp
+++ b/llvm/tools/llvm-mc/Disassembler.cpp
@@ -45,7 +45,11 @@ static bool PrintInsts(const MCDisassembler &DisAsm, const ByteArrayTy &Bytes,
     MCInst Inst;
 
     MCDisassembler::DecodeStatus S;
-    S = DisAsm.getInstruction(Inst, Size, Data.slice(Index), Index, nulls());
+    if (STI.getTargetTriple().getArch() == Triple::hexagon)
+      S = DisAsm.getInstructionBundle(Inst, Size, Data.slice(Index), Index,
+                                      nulls());
+    else
+      S = DisAsm.getInstruction(Inst, Size, Data.slice(Index), Index, nulls());
     switch (S) {
     case MCDisassembler::Fail:
       SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]),
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index c5967cd090eec..612a7a92ea378 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -693,6 +693,32 @@ class PrettyPrinter {
     } else
       OS << "\t<unknown>";
   }
+
+  virtual std::string getInstructionSeparator() const { return "\n"; }
+
+  virtual void emitPostInstructionInfo(formatted_raw_ostream &FOS,
+                                       const MCAsmInfo &MAI,
+                                       const MCSubtargetInfo &STI,
+                                       StringRef Comments,
+                                       LiveVariablePrinter &LVP) {
+    do {
+      if (!Comments.empty()) {
+        // Emit a line of comments.
+        StringRef Comment;
+        std::tie(Comment, Comments) = Comments.split('\n');
+        // MAI.getCommentColumn() assumes that instructions are printed at the
+        // position of 8, while getInstStartColumn() returns the actual
+        // position.
+        unsigned CommentColumn =
+            MAI.getCommentColumn() - 8 + getInstStartColumn(STI);
+        FOS.PadToColumn(CommentColumn);
+        FOS << MAI.getCommentString() << ' ' << Comment;
+      }
+      LVP.printAfterInst(FOS);
+      FOS << getInstructionSeparator();
+    } while (!Comments.empty());
+    FOS.flush();
+  }
 };
 PrettyPrinter PrettyPrinterInst;
 
@@ -714,6 +740,32 @@ class HexagonPrettyPrinter : public PrettyPrinter {
       }
     }
   }
+
+  std::string getInstructionSeparator() const override {
+    SmallString<40> Separator;
+    raw_svector_ostream OS(Separator);
+    if (ShouldClosePacket) {
+      OS << " }";
+      if (IsLoop0 || IsLoop1)
+        OS << "  ";
+      if (IsLoop0)
+        OS << (IsLoop1 ? ":endloop01" : ":endloop0");
+      else if (IsLoop1)
+        OS << ":endloop1";
+    }
+    OS << '\n';
+    return OS.str().str();
+  }
+
+  void emitPostInstructionInfo(formatted_raw_ostream &FOS, const MCAsmInfo &MAI,
+                               const MCSubtargetInfo &STI, StringRef Comments,
+                               LiveVariablePrinter &LVP) override {
+
+    PrettyPrinter::emitPostInstructionInfo(FOS, MAI, STI, Comments, LVP);
+    if (ShouldClosePacket)
+      reset();
+  }
+
   void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
                  object::SectionedAddress Address, formatted_raw_ostream &OS,
                  StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
@@ -724,60 +776,64 @@ class HexagonPrettyPrinter : public PrettyPrinter {
     if (!MI) {
       printLead(Bytes, Address.Address, OS);
       OS << " <unknown>";
+      reset();
       return;
     }
-    std::string Buffer;
+
+    StringRef Preamble = IsStartOfBundle ? " { " : "   ";
+
+    if (SP && (PrintSource || PrintLines))
+      SP->printSourceLine(OS, Address, ObjectFilename, LVP, "");
+    printLead(Bytes, Address.Address, OS);
+    OS << Preamble;
+    std::string Buf;
     {
-      raw_string_ostream TempStream(Buffer);
+      raw_string_ostream TempStream(Buf);
       IP.printInst(MI, Address.Address, "", STI, TempStream);
     }
-    StringRef Contents(Buffer);
-    // Split off bundle attributes
-    auto PacketBundle = Contents.rsplit('\n');
-    // Split off first instruction from the rest
-    auto HeadTail = PacketBundle.first.split('\n');
-    auto Preamble = " { ";
-    auto Separator = "";
-
-    // Hexagon's packets require relocations to be inline rather than
-    // clustered at the end of the packet.
-    std::vector<RelocationRef>::const_iterator RelCur = Rels->begin();
-    std::vector<RelocationRef>::const_iterator RelEnd = Rels->end();
-    auto PrintReloc = [&]() -> void {
-      while ((RelCur != RelEnd) && (RelCur->getOffset() <= Address.Address)) {
-        if (RelCur->getOffset() == Address.Address) {
-          printRelocation(OS, ObjectFilename, *RelCur, Address.Address, false);
-          return;
-        }
-        ++RelCur;
-      }
-    };
+    StringRef Contents(Buf);
+
+    auto Duplex = Contents.split('\v');
+    bool HasDuplex = !Duplex.second.empty();
+    if (HasDuplex) {
+      OS << Duplex.first;
+      OS << "; ";
+      OS << Duplex.second;
+    } else {
+      OS << Duplex.first;
+    }
 
-    while (!HeadTail.first.empty()) {
-      OS << Separator;
-      Separator = "\n";
-      if (SP && (PrintSource || PrintLines))
-        SP->printSourceLine(OS, Address, ObjectFilename, LVP, "");
-      printLead(Bytes, Address.Address, OS);
-      OS << Preamble;
-      Preamble = "   ";
-      StringRef Inst;
-      auto Duplex = HeadTail.first.split('\v');
-      if (!Duplex.second.empty()) {
-        OS << Duplex.first;
-        OS << "; ";
-        Inst = Duplex.second;
-      }
+    uint32_t Instruction = support::endian::read32le(Bytes.data());
+
+    uint32_t ParseMask = 0x0000c000;
+    uint32_t PacketEndMask = 0x0000c000;
+    uint32_t LoopEndMask = 0x00008000;
+    uint32_t ParseBits = Instruction & ParseMask;
+
+    if (ParseBits == LoopEndMask) {
+      if (IsStartOfBundle)
+        IsLoop0 = true;
       else
-        Inst = HeadTail.first;
-      OS << Inst;
-      HeadTail = HeadTail.second.split('\n');
-      if (HeadTail.first.empty())
-        OS << " } " << PacketBundle.second;
-      PrintReloc();
-      Bytes = Bytes.slice(4);
-      Address.Address += 4;
+        IsLoop1 = true;
     }
+
+    IsStartOfBundle = false;
+
+    if (ParseBits == PacketEndMask || HasDuplex)
+      ShouldClosePacket = true;
+  }
+
+private:
+  bool IsStartOfBundle = true;
+  bool IsLoop0 = false;
+  bool IsLoop1 = false;
+  bool ShouldClosePacket = false;
+
+  void reset() {
+    IsStartOfBundle = true;
+    IsLoop0 = false;
+    IsLoop1 = false;
+    ShouldClosePacket = false;
   }
 };
 HexagonPrettyPrinter HexagonPrettyPrinterInst;
@@ -1610,29 +1666,6 @@ static StringRef getSegmentName(const MachOObjectFile *MachO,
   return "";
 }
 
-static void emitPostInstructionInfo(formatted_raw_ostream &FOS,
-                                    const MCAsmInfo &MAI,
-                                    const MCSubtargetInfo &STI,
-                                    StringRef Comments,
-                                    LiveVariablePrinter &LVP) {
-  do {
-    if (!Comments.empty()) {
-      // Emit a line of comments.
-      StringRef Comment;
-      std::tie(Comment, Comments) = Comments.split('\n');
-      // MAI.getCommentColumn() assumes that instructions are printed at the
-      // position of 8, while getInstStartColumn() returns the actual position.
-      unsigned CommentColumn =
-          MAI.getCommentColumn() - 8 + getInstStartColumn(STI);
-      FOS.PadToColumn(CommentColumn);
-      FOS << MAI.getCommentString() << ' ' << Comment;
-    }
-    LVP.printAfterInst(FOS);
-    FOS << '\n';
-  } while (!Comments.empty());
-  FOS.flush();
-}
-
 static void createFakeELFSections(ObjectFile &Obj) {
   assert(Obj.isELF());
   if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(&Obj))
@@ -2526,15 +2559,15 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
         }
 
         assert(DT->Context->getAsmInfo());
-        emitPostInstructionInfo(FOS, *DT->Context->getAsmInfo(),
-                                *DT->SubtargetInfo, CommentStream.str(), LVP);
+        DT->Printer->emitPostInstructionInfo(FOS, *DT->Context->getAsmInfo(),
+                                             *DT->SubtargetInfo,
+                                             CommentStream.str(), LVP);
         Comments.clear();
 
         if (BTF)
           printBTFRelocation(FOS, *BTF, {Index, Section.getIndex()}, LVP);
 
-        // Hexagon handles relocs in pretty printer
-        if (InlineRelocs && Obj.getArch() != Triple::hexagon) {
+        if (InlineRelocs) {
           while (findRel()) {
             // When --adjust-vma is used, update the address printed.
             printRelocation(FOS, Obj.getFileName(), *RelCur,

>From 5bd717c83ed66c8650ade70c56522cc98d78aa13 Mon Sep 17 00:00:00 2001
From: quic-areg <aregmi at quicinc.com>
Date: Thu, 26 Jun 2025 11:36:57 -0700
Subject: [PATCH 2/2] fix braces style

---
 llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 50b66772212a4..f83e06cd3d930 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -38,8 +38,9 @@ void HexagonInstPrinter::printInst(const MCInst *MI, uint64_t Address,
     OS << '\v';
     HasExtender = false;
     printInstruction(MI->getOperand(0).getInst(), Address, OS);
-  } else
+  } else {
     printInstruction(MI, Address, OS);
+  }
   HasExtender = HexagonMCInstrInfo::isImmext(*MI);
   if ((MI->getOpcode() & HexagonII::INST_PARSE_MASK) ==
       HexagonII::INST_PARSE_PACKET_END)



More information about the llvm-commits mailing list