[llvm] 8b655e1 - [TableGen][CodeEmitterGen] Add support for querying operand bit offsets

Thu Jul 20 01:10:56 PDT 2023

Author: Ilya Leoshkevich
Date: 2023-07-20T10:10:45+02:00
New Revision: 8b655e1f0a70aeff26c25798eb1951d6b9b9e236

URL: https://github.com/llvm/llvm-project/commit/8b655e1f0a70aeff26c25798eb1951d6b9b9e236
DIFF: https://github.com/llvm/llvm-project/commit/8b655e1f0a70aeff26c25798eb1951d6b9b9e236.diff

LOG: [TableGen][CodeEmitterGen] Add support for querying operand bit offsets

In order to generate relocations or to apply fixups after the layout
has been computed, the targets need to know the offsets of the
respective operands. There are indirect ways to figure them out in some
cases, for example, on SystemZ, the first memory operand is always at
offset 2, and the second one is always at offset 4. But there are no
such tricks for the immediate operands on SystemZ, so one has to refer
to individual instruction encodings.

This information, however, is available to TableGen. Generate
the getOperandBitOffset() method to access it, and use it to simplify
getting memory operand offsets on SystemZ. This also paves the way for
implementing symbolic immediates on this platform.

For the multi-lit operands, getOperandBitOffset() returns the offset of
the first lit.

An alternative way to obtain offsets would be to pass them to the
encoder methods, but this would require reworking all targets. Also,
VarLenCodeEmitter already does this, but adopting it requires
reworking the respective targets without other significant benefits.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D155329

Added: 
    

Modified: 
    llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
    llvm/utils/TableGen/CodeEmitterGen.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index c52fb7d55c06e1..a0648a077e2bf9 100644

--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -37,8 +37,6 @@ class SystemZMCCodeEmitter : public MCCodeEmitter {
   const MCInstrInfo &MCII;
   MCContext &Ctx;
 
-  mutable unsigned MemOpsEmitted;
-
 public:
   SystemZMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
     : MCII(mcii), Ctx(ctx) {
@@ -56,6 +54,8 @@ class SystemZMCCodeEmitter : public MCCodeEmitter {
   uint64_t getBinaryCodeForInstr(const MCInst &MI,
                                  SmallVectorImpl<MCFixup> &Fixups,
                                  const MCSubtargetInfo &STI) const;
+  uint32_t getOperandBitOffset(const MCInst &MI, unsigned OpNum,
+                               const MCSubtargetInfo &STI) const;
 
   // Called by the TableGen code to get the binary encoding of operand
   // MO in MI.  Fixups is the list of fixups against MI.
@@ -67,6 +67,7 @@ class SystemZMCCodeEmitter : public MCCodeEmitter {
   // add a fixup for it and return 0.
   uint64_t getDispOpValue(const MCInst &MI, unsigned OpNum,
                           SmallVectorImpl<MCFixup> &Fixups,
+                          const MCSubtargetInfo &STI, unsigned OpSize,
                           SystemZ::FixupKind Kind) const;
 
   // Called by the TableGen code to get the binary encoding of an address.
@@ -144,7 +145,6 @@ void SystemZMCCodeEmitter::encodeInstruction(const MCInst &MI,
                                              SmallVectorImpl<char> &CB,
                                              SmallVectorImpl<MCFixup> &Fixups,
                                              const MCSubtargetInfo &STI) const {
-  MemOpsEmitted = 0;
   uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
   unsigned Size = MCII.get(MI.getOpcode()).getSize();
   // Big-endian insertion of Size bytes.
@@ -166,21 +166,20 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
   llvm_unreachable("Unexpected operand type!");
 }
 
-uint64_t SystemZMCCodeEmitter::
-getDispOpValue(const MCInst &MI, unsigned OpNum,
-               SmallVectorImpl<MCFixup> &Fixups,
-               SystemZ::FixupKind Kind) const {
+uint64_t SystemZMCCodeEmitter::getDispOpValue(const MCInst &MI, unsigned OpNum,
+                                              SmallVectorImpl<MCFixup> &Fixups,
+                                              const MCSubtargetInfo &STI,
+                                              unsigned OpSize,
+                                              SystemZ::FixupKind Kind) const {
   const MCOperand &MO = MI.getOperand(OpNum);
-  if (MO.isImm()) {
-    ++MemOpsEmitted;
+  if (MO.isImm())
     return static_cast<uint64_t>(MO.getImm());
-  }
   if (MO.isExpr()) {
-    // All instructions follow the pattern where the first displacement has a
-    // 2 bytes offset, and the second one 4 bytes.
-    unsigned ByteOffs = MemOpsEmitted++ == 0 ? 2 : 4;
-    Fixups.push_back(MCFixup::create(ByteOffs, MO.getExpr(), (MCFixupKind)Kind,
-                                     MI.getLoc()));
+    unsigned MIBitSize = MCII.get(MI.getOpcode()).getSize() * 8;
+    uint32_t RawBitOffset = getOperandBitOffset(MI, OpNum, STI);
+    uint32_t BitOffset = MIBitSize - RawBitOffset - OpSize;
+    Fixups.push_back(MCFixup::create(BitOffset >> 3, MO.getExpr(),
+                                     (MCFixupKind)Kind, MI.getLoc()));
     assert(Fixups.size() <= 2 && "More than two memory operands in MI?");
     return 0;
   }
@@ -199,14 +198,16 @@ uint64_t
 SystemZMCCodeEmitter::getDisp12Encoding(const MCInst &MI, unsigned OpNum,
                                         SmallVectorImpl<MCFixup> &Fixups,
                                         const MCSubtargetInfo &STI) const {
-  return getDispOpValue(MI, OpNum, Fixups, SystemZ::FixupKind::FK_390_12);
+  return getDispOpValue(MI, OpNum, Fixups, STI, 12,
+                        SystemZ::FixupKind::FK_390_12);
 }
 
 uint64_t
 SystemZMCCodeEmitter::getDisp20Encoding(const MCInst &MI, unsigned OpNum,
                                         SmallVectorImpl<MCFixup> &Fixups,
                                         const MCSubtargetInfo &STI) const {
-  return getDispOpValue(MI, OpNum, Fixups, SystemZ::FixupKind::FK_390_20);
+  return getDispOpValue(MI, OpNum, Fixups, STI, 20,
+                        SystemZ::FixupKind::FK_390_20);
 }
 
 uint64_t
@@ -241,6 +242,7 @@ SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum,
   return 0;
 }
 
+#define GET_OPERAND_BIT_OFFSET
 #include "SystemZGenMCCodeEmitter.inc"
 
 MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII,

diff  --git a/llvm/utils/TableGen/CodeEmitterGen.cpp b/llvm/utils/TableGen/CodeEmitterGen.cpp
index cf42c3c4b0fd32..ba077c3c616065 100644
--- a/llvm/utils/TableGen/CodeEmitterGen.cpp
+++ b/llvm/utils/TableGen/CodeEmitterGen.cpp
@@ -7,8 +7,18 @@
 //===----------------------------------------------------------------------===//
 //
 // CodeEmitterGen uses the descriptions of instructions and their fields to
-// construct an automated code emitter: a function that, given a MachineInstr,
-// returns the (currently, 32-bit unsigned) value of the instruction.
+// construct an automated code emitter: a function called
+// getBinaryCodeForInstr() that, given a MCInst, returns the value of the
+// instruction - either as an uint64_t or as an APInt, depending on the
+// maximum bit width of all Inst definitions.
+//
+// In addition, it generates another function called getOperandBitOffset()
+// that, given a MCInst and an operand index, returns the minimum of indices of
+// all bits that carry some portion of the respective operand. When the target's
+// encodeInstruction() stores the instruction in a little-endian byte order, the
+// returned value is the offset of the start of the operand in the encoded
+// instruction. Other targets might need to adjust the returned value according
+// to their encodeInstruction() implementation.
 //
 //===----------------------------------------------------------------------===//
 
@@ -46,16 +56,22 @@ class CodeEmitterGen {
 
 private:
   int getVariableBit(const std::string &VarName, BitsInit *BI, int bit);
-  std::string getInstructionCase(Record *R, CodeGenTarget &Target);
-  std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
-                                            CodeGenTarget &Target);
+  std::pair<std::string, std::string>
+  getInstructionCases(Record *R, CodeGenTarget &Target);
+  void addInstructionCasesForEncoding(Record *R, Record *EncodingDef,
+                                      CodeGenTarget &Target, std::string &Case,
+                                      std::string &BitOffsetCase);
   bool addCodeToMergeInOperand(Record *R, BitsInit *BI,
-                               const std::string &VarName,
-                               std::string &Case, CodeGenTarget &Target);
+                               const std::string &VarName, std::string &Case,
+                               std::string &BitOffsetCase,
+                               CodeGenTarget &Target);
 
   void emitInstructionBaseValues(
       raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
       CodeGenTarget &Target, int HwMode = -1);
+  void
+  emitCaseMap(raw_ostream &o,
+              const std::map<std::string, std::vector<std::string>> &CaseMap);
   unsigned BitWidth = 0u;
   bool UseAPInt = false;
 };
@@ -80,6 +96,7 @@ int CodeEmitterGen::getVariableBit(const std::string &VarName,
 bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI,
                                              const std::string &VarName,
                                              std::string &Case,
+                                             std::string &BitOffsetCase,
                                              CodeGenTarget &Target) {
   CodeGenInstruction &CGI = Target.getInstruction(R);
 
@@ -175,6 +192,7 @@ bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI,
     ++numOperandLits;
   }
 
+  unsigned BitOffset = -1;
   for (; bit >= 0; ) {
     int varBit = getVariableBit(VarName, BI, bit);
     
@@ -183,7 +201,7 @@ bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI,
       --bit;
       continue;
     }
-    
+
     // Figure out the consecutive range of bits covered by this operand, in
     // order to generate better encoding code.
     int beginInstBit = bit;
@@ -202,6 +220,7 @@ bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI,
     unsigned loBit = beginVarBit - N + 1;
     unsigned hiBit = loBit + N;
     unsigned loInstBit = beginInstBit - N + 1;
+    BitOffset = loInstBit;
     if (UseAPInt) {
       std::string extractStr;
       if (N >= 64) {
@@ -243,49 +262,65 @@ bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI,
       }
     }
   }
+
+  if (BitOffset != (unsigned)-1) {
+    BitOffsetCase += "      case " + utostr(OpIdx) + ":\n";
+    BitOffsetCase += "        // op: " + VarName + "\n";
+    BitOffsetCase += "        return " + utostr(BitOffset) + ";\n";
+  }
+
   return true;
 }
 
-std::string CodeEmitterGen::getInstructionCase(Record *R,
-                                               CodeGenTarget &Target) {
-  std::string Case;
+std::pair<std::string, std::string>
+CodeEmitterGen::getInstructionCases(Record *R, CodeGenTarget &Target) {
+  std::string Case, BitOffsetCase;
+
+  auto append = [&](const char *S) {
+    Case += S;
+    BitOffsetCase += S;
+  };
+
   if (const RecordVal *RV = R->getValue("EncodingInfos")) {
     if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
       const CodeGenHwModes &HWM = Target.getHwModes();
       EncodingInfoByHwMode EBM(DI->getDef(), HWM);
-      Case += "      switch (HwMode) {\n";
-      Case += "      default: llvm_unreachable(\"Unhandled HwMode\");\n";
+      append("      switch (HwMode) {\n");
+      append("      default: llvm_unreachable(\"Unhandled HwMode\");\n");
       for (auto &KV : EBM) {
-        Case += "      case " + itostr(KV.first) + ": {\n";
-        Case += getInstructionCaseForEncoding(R, KV.second, Target);
-        Case += "      break;\n";
-        Case += "      }\n";
+        append(("      case " + itostr(KV.first) + ": {\n").c_str());
+        addInstructionCasesForEncoding(R, KV.second, Target, Case,
+                                       BitOffsetCase);
+        append("      break;\n");
+        append("      }\n");
       }
-      Case += "      }\n";
-      return Case;
+      append("      }\n");
+      return std::make_pair(std::move(Case), std::move(BitOffsetCase));
     }
   }
-  return getInstructionCaseForEncoding(R, R, Target);
+  addInstructionCasesForEncoding(R, R, Target, Case, BitOffsetCase);
+  return std::make_pair(std::move(Case), std::move(BitOffsetCase));
 }
 
-std::string CodeEmitterGen::getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
-                                                          CodeGenTarget &Target) {
-  std::string Case;
+void CodeEmitterGen::addInstructionCasesForEncoding(
+    Record *R, Record *EncodingDef, CodeGenTarget &Target, std::string &Case,
+    std::string &BitOffsetCase) {
   BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
 
   // Loop over all of the fields in the instruction, determining which are the
   // operands to the instruction.
   bool Success = true;
+  BitOffsetCase += "      switch (OpNum) {\n";
   for (const RecordVal &RV : EncodingDef->getValues()) {
     // Ignore fixed fields in the record, we're looking for values like:
     //    bits<5> RST = { ?, ?, ?, ?, ? };
     if (RV.isNonconcreteOK() || RV.getValue()->isComplete())
       continue;
 
-    Success &=
-        addCodeToMergeInOperand(R, BI, std::string(RV.getName()),
-                                Case, Target);
+    Success &= addCodeToMergeInOperand(R, BI, std::string(RV.getName()), Case,
+                                       BitOffsetCase, Target);
   }
+  BitOffsetCase += "      }\n";
 
   if (!Success) {
     // Dump the record, so we can see what's going on...
@@ -304,8 +339,6 @@ std::string CodeEmitterGen::getInstructionCaseForEncoding(Record *R, Record *Enc
     Case += ", STI";
     Case += ");\n";
   }
-  
-  return Case;
 }
 
 static void emitInstBits(raw_ostream &OS, const APInt &Bits) {
@@ -356,6 +389,26 @@ void CodeEmitterGen::emitInstructionBaseValues(
   o << "    UINT64_C(0)\n  };\n";
 }
 
+void CodeEmitterGen::emitCaseMap(
+    raw_ostream &o,
+    const std::map<std::string, std::vector<std::string>> &CaseMap) {
+  std::map<std::string, std::vector<std::string>>::const_iterator IE, EE;
+  for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) {
+    const std::string &Case = IE->first;
+    const std::vector<std::string> &InstList = IE->second;
+
+    for (int i = 0, N = InstList.size(); i < N; i++) {
+      if (i)
+        o << "\n";
+      o << "    case " << InstList[i] << ":";
+    }
+    o << " {\n";
+    o << Case;
+    o << "      break;\n"
+      << "    }\n";
+  }
+}
+
 void CodeEmitterGen::run(raw_ostream &o) {
   emitSourceFileHeader("Machine Code Emitter", o);
 
@@ -437,6 +490,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
 
     // Map to accumulate all the cases.
     std::map<std::string, std::vector<std::string>> CaseMap;
+    std::map<std::string, std::vector<std::string>> BitOffsetCaseMap;
 
     // Construct all cases statement for each opcode
     for (Record *R : Insts) {
@@ -445,9 +499,11 @@ void CodeEmitterGen::run(raw_ostream &o) {
         continue;
       std::string InstName =
           (R->getValueAsString("Namespace") + "::" + R->getName()).str();
-      std::string Case = getInstructionCase(R, Target);
+      std::string Case, BitOffsetCase;
+      std::tie(Case, BitOffsetCase) = getInstructionCases(R, Target);
 
-      CaseMap[Case].push_back(std::move(InstName));
+      CaseMap[Case].push_back(InstName);
+      BitOffsetCaseMap[BitOffsetCase].push_back(std::move(InstName));
     }
 
     // Emit initial function code
@@ -470,21 +526,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
     }
 
     // Emit each case statement
-    std::map<std::string, std::vector<std::string>>::iterator IE, EE;
-    for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) {
-      const std::string &Case = IE->first;
-      std::vector<std::string> &InstList = IE->second;
-
-      for (int i = 0, N = InstList.size(); i < N; i++) {
-        if (i)
-          o << "\n";
-        o << "    case " << InstList[i] << ":";
-      }
-      o << " {\n";
-      o << Case;
-      o << "      break;\n"
-        << "    }\n";
-    }
+    emitCaseMap(o, CaseMap);
 
     // Default case: unhandled opcode
     o << "  default:\n"
@@ -498,6 +540,23 @@ void CodeEmitterGen::run(raw_ostream &o) {
     else
       o << "  return Value;\n";
     o << "}\n\n";
+
+    o << "#ifdef GET_OPERAND_BIT_OFFSET\n"
+      << "#undef GET_OPERAND_BIT_OFFSET\n\n"
+      << "uint32_t " << Target.getName()
+      << "MCCodeEmitter::getOperandBitOffset(const MCInst &MI,\n"
+      << "    unsigned OpNum,\n"
+      << "    const MCSubtargetInfo &STI) const {\n"
+      << "  switch (MI.getOpcode()) {\n";
+    emitCaseMap(o, BitOffsetCaseMap);
+    o << "  }\n"
+      << "  std::string msg;\n"
+      << "  raw_string_ostream Msg(msg);\n"
+      << "  Msg << \"Not supported instr[opcode]: \" << MI << \"[\" << OpNum "
+         "<< \"]\";\n"
+      << "  report_fatal_error(Msg.str().c_str());\n"
+      << "}\n\n"
+      << "#endif // GET_OPERAND_BIT_OFFSET\n\n";
   }
 }