[clang] 578ca5e - [clang][bytecode] Print jump lines in Function::dump() (#135482)

via cfe-commits cfe-commits at lists.llvm.org
Sun Apr 13 06:46:05 PDT 2025


Author: Timm Baeder
Date: 2025-04-13T15:46:01+02:00
New Revision: 578ca5e469ef1c91a79aa15bc186921ee7faa855

URL: https://github.com/llvm/llvm-project/commit/578ca5e469ef1c91a79aa15bc186921ee7faa855
DIFF: https://github.com/llvm/llvm-project/commit/578ca5e469ef1c91a79aa15bc186921ee7faa855.diff

LOG: [clang][bytecode] Print jump lines in Function::dump() (#135482)

E.g. for
```c++
constexpr int foo(int b) {
  int a = 1+1;

  for (int i = 0; i < b; ++i) {
    ++a;
  }
  return a;
}
```

we now print:
```
foo 0x7cc8d4bf0580
frame size: 128
arg size:   8
rvo:        0
this arg:   0
0      InitScope         0
16     ConstSint32       1
32     ConstSint32       1
48     AddSint32
56     SetLocalSint32    40
72     ConstSint32       0
88     SetLocalSint32    104
104    GetPtrLocal       104         <-+
120    LoadPopSint32                   |
128    GetPtrParam       0             |
144    LoadPopSint32                   |
152    LTSint32                        |
160    Jf                80     --+    |
176    GetPtrLocal       40       |    |
192    IncPopSint32      1        |    |
208    GetPtrLocal       104      |    |
224    IncPopSint32      1        |    |
240    Jmp               -152     |  --+
256    GetPtrLocal       40     <-+
272    LoadPopSint32
280    Destroy           0
296    RetSint32
304    Destroy           0
320    NoRet
```

Added: 
    

Modified: 
    clang/lib/AST/ByteCode/Disasm.cpp
    clang/utils/TableGen/ClangOpcodesEmitter.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/AST/ByteCode/Disasm.cpp b/clang/lib/AST/ByteCode/Disasm.cpp
index 12c434029562d..d4c9ce6050b85 100644
--- a/clang/lib/AST/ByteCode/Disasm.cpp
+++ b/clang/lib/AST/ByteCode/Disasm.cpp
@@ -33,39 +33,74 @@
 using namespace clang;
 using namespace clang::interp;
 
-template <typename T> inline static T ReadArg(Program &P, CodePtr &OpPC) {
+template <typename T>
+inline static std::string printArg(Program &P, CodePtr &OpPC) {
   if constexpr (std::is_pointer_v<T>) {
     uint32_t ID = OpPC.read<uint32_t>();
-    return reinterpret_cast<T>(P.getNativePointer(ID));
+    std::string Result;
+    llvm::raw_string_ostream SS(Result);
+    SS << reinterpret_cast<T>(P.getNativePointer(ID));
+    return Result;
   } else {
-    return OpPC.read<T>();
+    std::string Result;
+    llvm::raw_string_ostream SS(Result);
+    auto Arg = OpPC.read<T>();
+    SS << Arg;
+    return Result;
   }
 }
 
-template <> inline Floating ReadArg<Floating>(Program &P, CodePtr &OpPC) {
-  Floating F = Floating::deserialize(*OpPC);
+template <> inline std::string printArg<Floating>(Program &P, CodePtr &OpPC) {
+  auto F = Floating::deserialize(*OpPC);
   OpPC += align(F.bytesToSerialize());
-  return F;
+
+  std::string Result;
+  llvm::raw_string_ostream SS(Result);
+  SS << F;
+  return Result;
 }
 
 template <>
-inline IntegralAP<false> ReadArg<IntegralAP<false>>(Program &P, CodePtr &OpPC) {
-  IntegralAP<false> I = IntegralAP<false>::deserialize(*OpPC);
-  OpPC += align(I.bytesToSerialize());
-  return I;
-}
+inline std::string printArg<IntegralAP<false>>(Program &P, CodePtr &OpPC) {
+  auto F = IntegralAP<false>::deserialize(*OpPC);
+  OpPC += align(F.bytesToSerialize());
 
+  std::string Result;
+  llvm::raw_string_ostream SS(Result);
+  SS << F;
+  return Result;
+}
 template <>
-inline IntegralAP<true> ReadArg<IntegralAP<true>>(Program &P, CodePtr &OpPC) {
-  IntegralAP<true> I = IntegralAP<true>::deserialize(*OpPC);
-  OpPC += align(I.bytesToSerialize());
-  return I;
+inline std::string printArg<IntegralAP<true>>(Program &P, CodePtr &OpPC) {
+  auto F = IntegralAP<true>::deserialize(*OpPC);
+  OpPC += align(F.bytesToSerialize());
+
+  std::string Result;
+  llvm::raw_string_ostream SS(Result);
+  SS << F;
+  return Result;
 }
 
-template <> inline FixedPoint ReadArg<FixedPoint>(Program &P, CodePtr &OpPC) {
-  FixedPoint I = FixedPoint::deserialize(*OpPC);
-  OpPC += align(I.bytesToSerialize());
-  return I;
+template <> inline std::string printArg<FixedPoint>(Program &P, CodePtr &OpPC) {
+  auto F = FixedPoint::deserialize(*OpPC);
+  OpPC += align(F.bytesToSerialize());
+
+  std::string Result;
+  llvm::raw_string_ostream SS(Result);
+  SS << F;
+  return Result;
+}
+
+static bool isJumpOpcode(Opcode Op) {
+  return Op == OP_Jmp || Op == OP_Jf || Op == OP_Jt;
+}
+
+static size_t getNumDisplayWidth(size_t N) {
+  unsigned L = 1u, M = 10u;
+  while (M <= N && ++L != std::numeric_limits<size_t>::digits10 + 1)
+    M *= 10u;
+
+  return L;
 }
 
 LLVM_DUMP_METHOD void Function::dump() const { dump(llvm::errs()); }
@@ -80,23 +115,115 @@ LLVM_DUMP_METHOD void Function::dump(llvm::raw_ostream &OS) const {
   OS << "rvo:        " << hasRVO() << "\n";
   OS << "this arg:   " << hasThisPointer() << "\n";
 
-  auto PrintName = [&OS](const char *Name) {
-    OS << Name;
-    long N = 30 - strlen(Name);
-    if (N > 0)
-      OS.indent(N);
+  struct OpText {
+    size_t Addr;
+    std::string Op;
+    bool IsJump;
+    llvm::SmallVector<std::string> Args;
   };
 
+  auto PrintName = [](const char *Name) -> std::string {
+    return std::string(Name);
+  };
+
+  llvm::SmallVector<OpText> Code;
+  size_t LongestAddr = 0;
+  size_t LongestOp = 0;
+
   for (CodePtr Start = getCodeBegin(), PC = Start; PC != getCodeEnd();) {
     size_t Addr = PC - Start;
+    OpText Text;
     auto Op = PC.read<Opcode>();
-    OS << llvm::format("%8d", Addr) << " ";
+    Text.Addr = Addr;
+    Text.IsJump = isJumpOpcode(Op);
     switch (Op) {
 #define GET_DISASM
 #include "Opcodes.inc"
 #undef GET_DISASM
     }
+    Code.push_back(Text);
+    LongestOp = std::max(Text.Op.size(), LongestOp);
+    LongestAddr = std::max(getNumDisplayWidth(Addr), LongestAddr);
   }
+
+  // Record jumps and their targets.
+  struct JmpData {
+    size_t From;
+    size_t To;
+  };
+  llvm::SmallVector<JmpData> Jumps;
+  for (auto &Text : Code) {
+    if (Text.IsJump)
+      Jumps.push_back({Text.Addr, Text.Addr + std::stoi(Text.Args[0]) +
+                                      align(sizeof(Opcode)) +
+                                      align(sizeof(int32_t))});
+  }
+
+  llvm::SmallVector<std::string> Text;
+  Text.reserve(Code.size());
+  size_t LongestLine = 0;
+  // Print code to a string, one at a time.
+  for (auto C : Code) {
+    std::string Line;
+    llvm::raw_string_ostream LS(Line);
+    LS << C.Addr;
+    LS.indent(LongestAddr - getNumDisplayWidth(C.Addr) + 4);
+    LS << C.Op;
+    LS.indent(LongestOp - C.Op.size() + 4);
+    for (auto &Arg : C.Args) {
+      LS << Arg << ' ';
+    }
+    Text.push_back(Line);
+    LongestLine = std::max(Line.size(), LongestLine);
+  }
+
+  assert(Code.size() == Text.size());
+
+  auto spaces = [](unsigned N) -> std::string {
+    std::string S;
+    for (unsigned I = 0; I != N; ++I)
+      S += ' ';
+    return S;
+  };
+
+  // Now, draw the jump lines.
+  for (auto &J : Jumps) {
+    if (J.To > J.From) {
+      bool FoundStart = false;
+      for (size_t LineIndex = 0; LineIndex != Text.size(); ++LineIndex) {
+        Text[LineIndex] += spaces(LongestLine - Text[LineIndex].size());
+
+        if (Code[LineIndex].Addr == J.From) {
+          Text[LineIndex] += "  --+";
+          FoundStart = true;
+        } else if (Code[LineIndex].Addr == J.To) {
+          Text[LineIndex] += "  <-+";
+          break;
+        } else if (FoundStart) {
+          Text[LineIndex] += "    |";
+        }
+      }
+      LongestLine += 5;
+    } else {
+      bool FoundStart = false;
+      for (ssize_t LineIndex = Text.size() - 1; LineIndex >= 0; --LineIndex) {
+        Text[LineIndex] += spaces(LongestLine - Text[LineIndex].size());
+        if (Code[LineIndex].Addr == J.From) {
+          Text[LineIndex] += "  --+";
+          FoundStart = true;
+        } else if (Code[LineIndex].Addr == J.To) {
+          Text[LineIndex] += "  <-+";
+          break;
+        } else if (FoundStart) {
+          Text[LineIndex] += "    |";
+        }
+      }
+      LongestLine += 5;
+    }
+  }
+
+  for (auto &Line : Text)
+    OS << Line << '\n';
 }
 
 LLVM_DUMP_METHOD void Program::dump() const { dump(llvm::errs()); }

diff  --git a/clang/utils/TableGen/ClangOpcodesEmitter.cpp b/clang/utils/TableGen/ClangOpcodesEmitter.cpp
index 64534a50877ec..5d6d90994cf37 100644
--- a/clang/utils/TableGen/ClangOpcodesEmitter.cpp
+++ b/clang/utils/TableGen/ClangOpcodesEmitter.cpp
@@ -171,16 +171,12 @@ void ClangOpcodesEmitter::EmitDisasm(raw_ostream &OS, StringRef N,
   OS << "#ifdef GET_DISASM\n";
   Enumerate(R, N, [R, &OS](ArrayRef<const Record *>, const Twine &ID) {
     OS << "case OP_" << ID << ":\n";
-    OS << "  PrintName(\"" << ID << "\");\n";
-    OS << "  OS << \"\\t\"";
+    OS << "  Text.Op = PrintName(\"" << ID << "\");\n";
+    for (const auto *Arg : R->getValueAsListOfDefs("Args"))
+      OS << "  Text.Args.push_back(printArg<" << Arg->getValueAsString("Name")
+         << ">(P, PC));\n";
 
-    for (const auto *Arg : R->getValueAsListOfDefs("Args")) {
-      OS << " << ReadArg<" << Arg->getValueAsString("Name") << ">(P, PC)";
-      OS << " << \" \"";
-    }
-
-    OS << " << \"\\n\";\n";
-    OS << "  continue;\n";
+    OS << "  break;\n";
   });
   OS << "#endif\n";
 }


        


More information about the cfe-commits mailing list