[llvm] [TableGen] Extract InstructionEncoding class into a separate file (NFC) (PR #158505)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 14 12:16:29 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-tablegen
Author: Sergei Barannikov (s-barannikov)
<details>
<summary>Changes</summary>
The class will be used to simplify CodeEmitterGen.
---
Patch is 44.05 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/158505.diff
4 Files Affected:
- (modified) llvm/utils/TableGen/Common/CMakeLists.txt (+1)
- (added) llvm/utils/TableGen/Common/InstructionEncoding.cpp (+429)
- (added) llvm/utils/TableGen/Common/InstructionEncoding.h (+150)
- (modified) llvm/utils/TableGen/DecoderEmitter.cpp (+14-535)
``````````diff
diff --git a/llvm/utils/TableGen/Common/CMakeLists.txt b/llvm/utils/TableGen/Common/CMakeLists.txt
index 7342156980f35..66279a3ed3755 100644
--- a/llvm/utils/TableGen/Common/CMakeLists.txt
+++ b/llvm/utils/TableGen/Common/CMakeLists.txt
@@ -29,6 +29,7 @@ add_llvm_library(LLVMTableGenCommon STATIC OBJECT EXCLUDE_FROM_ALL DISABLE_LLVM_
CodeGenTarget.cpp
DAGISelMatcher.cpp
InfoByHwMode.cpp
+ InstructionEncoding.cpp
OptEmitter.cpp
PredicateExpander.cpp
SubtargetFeatureInfo.cpp
diff --git a/llvm/utils/TableGen/Common/InstructionEncoding.cpp b/llvm/utils/TableGen/Common/InstructionEncoding.cpp
new file mode 100644
index 0000000000000..22163e1898333
--- /dev/null
+++ b/llvm/utils/TableGen/Common/InstructionEncoding.cpp
@@ -0,0 +1,429 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstructionEncoding.h"
+#include "CodeGenInstruction.h"
+#include "VarLenCodeEmitterGen.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/TableGen/Error.h"
+
+using namespace llvm;
+
+static std::string findOperandDecoderMethod(const Record *Record) {
+ std::string Decoder;
+
+ const RecordVal *DecoderString = Record->getValue("DecoderMethod");
+ const StringInit *String =
+ DecoderString ? dyn_cast<StringInit>(DecoderString->getValue()) : nullptr;
+ if (String) {
+ Decoder = String->getValue().str();
+ if (!Decoder.empty())
+ return Decoder;
+ }
+
+ if (Record->isSubClassOf("RegisterOperand"))
+ // Allows use of a DecoderMethod in referenced RegisterClass if set.
+ return findOperandDecoderMethod(Record->getValueAsDef("RegClass"));
+
+ if (Record->isSubClassOf("RegisterClass")) {
+ Decoder = "Decode" + Record->getName().str() + "RegisterClass";
+ } else if (Record->isSubClassOf("PointerLikeRegClass")) {
+ Decoder = "DecodePointerLikeRegClass" +
+ utostr(Record->getValueAsInt("RegClassKind"));
+ }
+
+ return Decoder;
+}
+
+static OperandInfo getOpInfo(const Record *TypeRecord) {
+ const RecordVal *HasCompleteDecoderVal =
+ TypeRecord->getValue("hasCompleteDecoder");
+ const BitInit *HasCompleteDecoderBit =
+ HasCompleteDecoderVal
+ ? dyn_cast<BitInit>(HasCompleteDecoderVal->getValue())
+ : nullptr;
+ bool HasCompleteDecoder =
+ HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true;
+
+ return OperandInfo(findOperandDecoderMethod(TypeRecord), HasCompleteDecoder);
+}
+
+void InstructionEncoding::parseVarLenEncoding(const VarLenInst &VLI) {
+ InstBits = KnownBits(VLI.size());
+ SoftFailMask = APInt(VLI.size(), 0);
+
+ // Parse Inst field.
+ unsigned I = 0;
+ for (const EncodingSegment &S : VLI) {
+ if (const auto *SegmentBits = dyn_cast<BitsInit>(S.Value)) {
+ for (const Init *V : SegmentBits->getBits()) {
+ if (const auto *B = dyn_cast<BitInit>(V)) {
+ if (B->getValue())
+ InstBits.One.setBit(I);
+ else
+ InstBits.Zero.setBit(I);
+ }
+ ++I;
+ }
+ } else if (const auto *B = dyn_cast<BitInit>(S.Value)) {
+ if (B->getValue())
+ InstBits.One.setBit(I);
+ else
+ InstBits.Zero.setBit(I);
+ ++I;
+ } else {
+ I += S.BitWidth;
+ }
+ }
+ assert(I == VLI.size());
+}
+
+void InstructionEncoding::parseFixedLenEncoding(
+ const BitsInit &RecordInstBits) {
+ // For fixed length instructions, sometimes the `Inst` field specifies more
+ // bits than the actual size of the instruction, which is specified in `Size`.
+ // In such cases, we do some basic validation and drop the upper bits.
+ unsigned BitWidth = EncodingDef->getValueAsInt("Size") * 8;
+ unsigned InstNumBits = RecordInstBits.getNumBits();
+
+ // Returns true if all bits in `Bits` are zero or unset.
+ auto CheckAllZeroOrUnset = [&](ArrayRef<const Init *> Bits,
+ const RecordVal *Field) {
+ bool AllZeroOrUnset = llvm::all_of(Bits, [](const Init *Bit) {
+ if (const auto *BI = dyn_cast<BitInit>(Bit))
+ return !BI->getValue();
+ return isa<UnsetInit>(Bit);
+ });
+ if (AllZeroOrUnset)
+ return;
+ PrintNote([Field](raw_ostream &OS) { Field->print(OS); });
+ PrintFatalError(EncodingDef, Twine(Name) + ": Size is " + Twine(BitWidth) +
+ " bits, but " + Field->getName() +
+ " bits beyond that are not zero/unset");
+ };
+
+ if (InstNumBits < BitWidth)
+ PrintFatalError(EncodingDef, Twine(Name) + ": Size is " + Twine(BitWidth) +
+ " bits, but Inst specifies only " +
+ Twine(InstNumBits) + " bits");
+
+ if (InstNumBits > BitWidth) {
+ // Ensure that all the bits beyond 'Size' are 0 or unset (i.e., carry no
+ // actual encoding).
+ ArrayRef<const Init *> UpperBits =
+ RecordInstBits.getBits().drop_front(BitWidth);
+ const RecordVal *InstField = EncodingDef->getValue("Inst");
+ CheckAllZeroOrUnset(UpperBits, InstField);
+ }
+
+ ArrayRef<const Init *> ActiveInstBits =
+ RecordInstBits.getBits().take_front(BitWidth);
+ InstBits = KnownBits(BitWidth);
+ SoftFailMask = APInt(BitWidth, 0);
+
+ // Parse Inst field.
+ for (auto [I, V] : enumerate(ActiveInstBits)) {
+ if (const auto *B = dyn_cast<BitInit>(V)) {
+ if (B->getValue())
+ InstBits.One.setBit(I);
+ else
+ InstBits.Zero.setBit(I);
+ }
+ }
+
+ // Parse SoftFail field.
+ const RecordVal *SoftFailField = EncodingDef->getValue("SoftFail");
+ if (!SoftFailField)
+ return;
+
+ const auto *SFBits = dyn_cast<BitsInit>(SoftFailField->getValue());
+ if (!SFBits || SFBits->getNumBits() != InstNumBits) {
+ PrintNote(EncodingDef->getLoc(), "in record");
+ PrintFatalError(SoftFailField,
+ formatv("SoftFail field, if defined, must be "
+ "of the same type as Inst, which is bits<{}>",
+ InstNumBits));
+ }
+
+ if (InstNumBits > BitWidth) {
+ // Ensure that all upper bits of `SoftFail` are 0 or unset.
+ ArrayRef<const Init *> UpperBits = SFBits->getBits().drop_front(BitWidth);
+ CheckAllZeroOrUnset(UpperBits, SoftFailField);
+ }
+
+ ArrayRef<const Init *> ActiveSFBits = SFBits->getBits().take_front(BitWidth);
+ for (auto [I, V] : enumerate(ActiveSFBits)) {
+ if (const auto *B = dyn_cast<BitInit>(V); B && B->getValue()) {
+ if (!InstBits.Zero[I] && !InstBits.One[I]) {
+ PrintNote(EncodingDef->getLoc(), "in record");
+ PrintError(SoftFailField,
+ formatv("SoftFail{{{0}} = 1 requires Inst{{{0}} "
+ "to be fully defined (0 or 1, not '?')",
+ I));
+ }
+ SoftFailMask.setBit(I);
+ }
+ }
+}
+
+void InstructionEncoding::parseVarLenOperands(const VarLenInst &VLI) {
+ SmallVector<int> TiedTo;
+
+ for (const auto &[Idx, Op] : enumerate(Inst->Operands)) {
+ if (Op.MIOperandInfo && Op.MIOperandInfo->getNumArgs() > 0)
+ for (auto *Arg : Op.MIOperandInfo->getArgs())
+ Operands.push_back(getOpInfo(cast<DefInit>(Arg)->getDef()));
+ else
+ Operands.push_back(getOpInfo(Op.Rec));
+
+ int TiedReg = Op.getTiedRegister();
+ TiedTo.push_back(-1);
+ if (TiedReg != -1) {
+ TiedTo[Idx] = TiedReg;
+ TiedTo[TiedReg] = Idx;
+ }
+ }
+
+ unsigned CurrBitPos = 0;
+ for (const auto &EncodingSegment : VLI) {
+ unsigned Offset = 0;
+ StringRef OpName;
+
+ if (const StringInit *SI = dyn_cast<StringInit>(EncodingSegment.Value)) {
+ OpName = SI->getValue();
+ } else if (const DagInit *DI = dyn_cast<DagInit>(EncodingSegment.Value)) {
+ OpName = cast<StringInit>(DI->getArg(0))->getValue();
+ Offset = cast<IntInit>(DI->getArg(2))->getValue();
+ }
+
+ if (!OpName.empty()) {
+ auto OpSubOpPair = Inst->Operands.parseOperandName(OpName);
+ unsigned OpIdx = Inst->Operands.getFlattenedOperandNumber(OpSubOpPair);
+ Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset);
+ if (!EncodingSegment.CustomDecoder.empty())
+ Operands[OpIdx].Decoder = EncodingSegment.CustomDecoder.str();
+
+ int TiedReg = TiedTo[OpSubOpPair.first];
+ if (TiedReg != -1) {
+ unsigned OpIdx = Inst->Operands.getFlattenedOperandNumber(
+ {TiedReg, OpSubOpPair.second});
+ Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset);
+ }
+ }
+
+ CurrBitPos += EncodingSegment.BitWidth;
+ }
+}
+
+static void debugDumpRecord(const Record &Rec) {
+ // Dump the record, so we can see what's going on.
+ PrintNote([&Rec](raw_ostream &OS) {
+ OS << "Dumping record for previous error:\n";
+ OS << Rec;
+ });
+}
+
+/// For an operand field named OpName: populate OpInfo.InitValue with the
+/// constant-valued bit values, and OpInfo.Fields with the ranges of bits to
+/// insert from the decoded instruction.
+static void addOneOperandFields(const Record *EncodingDef,
+ const BitsInit &InstBits,
+ std::map<StringRef, StringRef> &TiedNames,
+ const Record *OpRec, StringRef OpName,
+ OperandInfo &OpInfo) {
+ OpInfo.Name = OpName;
+
+ // Find a field with the operand's name.
+ const RecordVal *OpEncodingField = EncodingDef->getValue(OpName);
+
+ // If there is no such field, try tied operand's name.
+ if (!OpEncodingField) {
+ if (auto I = TiedNames.find(OpName); I != TiedNames.end())
+ OpEncodingField = EncodingDef->getValue(I->second);
+
+ // If still no luck, we're done with this operand.
+ if (!OpEncodingField) {
+ OpInfo.HasNoEncoding = true;
+ return;
+ }
+ }
+
+ // Some or all bits of the operand may be required to be 0 or 1 depending
+ // on the instruction's encoding. Collect those bits.
+ if (const auto *OpBit = dyn_cast<BitInit>(OpEncodingField->getValue())) {
+ OpInfo.InitValue = OpBit->getValue();
+ return;
+ }
+ if (const auto *OpBits = dyn_cast<BitsInit>(OpEncodingField->getValue())) {
+ if (OpBits->getNumBits() == 0) {
+ if (OpInfo.Decoder.empty()) {
+ PrintError(EncodingDef->getLoc(), "operand '" + OpName + "' of type '" +
+ OpRec->getName() +
+ "' must have a decoder method");
+ }
+ return;
+ }
+ for (unsigned I = 0; I < OpBits->getNumBits(); ++I) {
+ if (const auto *OpBit = dyn_cast<BitInit>(OpBits->getBit(I)))
+ OpInfo.InitValue = OpInfo.InitValue.value_or(0) |
+ static_cast<uint64_t>(OpBit->getValue()) << I;
+ }
+ }
+
+ // Find out where the variable bits of the operand are encoded. The bits don't
+ // have to be consecutive or in ascending order. For example, an operand could
+ // be encoded as follows:
+ //
+ // 7 6 5 4 3 2 1 0
+ // {1, op{5}, op{2}, op{1}, 0, op{4}, op{3}, ?}
+ //
+ // In this example the operand is encoded in three segments:
+ //
+ // Base Width Offset
+ // op{2...1} 4 2 1
+ // op{4...3} 1 2 3
+ // op{5} 6 1 5
+ //
+ for (unsigned I = 0, J = 0; I != InstBits.getNumBits(); I = J) {
+ const VarInit *Var;
+ unsigned Offset = 0;
+ for (; J != InstBits.getNumBits(); ++J) {
+ const Init *BitJ = InstBits.getBit(J);
+ if (const auto *VBI = dyn_cast<VarBitInit>(BitJ)) {
+ Var = dyn_cast<VarInit>(VBI->getBitVar());
+ if (I == J)
+ Offset = VBI->getBitNum();
+ else if (VBI->getBitNum() != Offset + J - I)
+ break;
+ } else {
+ Var = dyn_cast<VarInit>(BitJ);
+ }
+ if (!Var ||
+ (Var->getName() != OpName && Var->getName() != TiedNames[OpName]))
+ break;
+ }
+ if (I == J)
+ ++J;
+ else
+ OpInfo.addField(I, J - I, Offset);
+ }
+}
+
+void InstructionEncoding::parseFixedLenOperands(const BitsInit &Bits) {
+ // Search for tied operands, so that we can correctly instantiate
+ // operands that are not explicitly represented in the encoding.
+ std::map<StringRef, StringRef> TiedNames;
+ for (const auto &Op : Inst->Operands) {
+ for (const auto &[J, CI] : enumerate(Op.Constraints)) {
+ if (!CI.isTied())
+ continue;
+ std::pair<unsigned, unsigned> SO =
+ Inst->Operands.getSubOperandNumber(CI.getTiedOperand());
+ StringRef TiedName = Inst->Operands[SO.first].SubOpNames[SO.second];
+ if (TiedName.empty())
+ TiedName = Inst->Operands[SO.first].Name;
+ StringRef MyName = Op.SubOpNames[J];
+ if (MyName.empty())
+ MyName = Op.Name;
+
+ TiedNames[MyName] = TiedName;
+ TiedNames[TiedName] = MyName;
+ }
+ }
+
+ // For each operand, see if we can figure out where it is encoded.
+ for (const CGIOperandList::OperandInfo &Op : Inst->Operands) {
+ // Lookup the decoder method and construct a new OperandInfo to hold our
+ // result.
+ OperandInfo OpInfo = getOpInfo(Op.Rec);
+
+ // If we have named sub-operands...
+ if (Op.MIOperandInfo && !Op.SubOpNames[0].empty()) {
+ // Then there should not be a custom decoder specified on the top-level
+ // type.
+ if (!OpInfo.Decoder.empty()) {
+ PrintError(EncodingDef,
+ "DecoderEmitter: operand \"" + Op.Name + "\" has type \"" +
+ Op.Rec->getName() +
+ "\" with a custom DecoderMethod, but also named "
+ "sub-operands.");
+ continue;
+ }
+
+ // Decode each of the sub-ops separately.
+ for (auto [SubOpName, SubOp] :
+ zip_equal(Op.SubOpNames, Op.MIOperandInfo->getArgs())) {
+ const Record *SubOpRec = cast<DefInit>(SubOp)->getDef();
+ OperandInfo SubOpInfo = getOpInfo(SubOpRec);
+ addOneOperandFields(EncodingDef, Bits, TiedNames, SubOpRec, SubOpName,
+ SubOpInfo);
+ Operands.push_back(std::move(SubOpInfo));
+ }
+ continue;
+ }
+
+ // Otherwise, if we have an operand with sub-operands, but they aren't
+ // named...
+ if (Op.MIOperandInfo && OpInfo.Decoder.empty()) {
+ // If we have sub-ops, we'd better have a custom decoder.
+ // (Otherwise we don't know how to populate them properly...)
+ if (Op.MIOperandInfo->getNumArgs()) {
+ PrintError(EncodingDef,
+ "DecoderEmitter: operand \"" + Op.Name +
+ "\" has non-empty MIOperandInfo, but doesn't "
+ "have a custom decoder!");
+ debugDumpRecord(*EncodingDef);
+ continue;
+ }
+ }
+
+ addOneOperandFields(EncodingDef, Bits, TiedNames, Op.Rec, Op.Name, OpInfo);
+ Operands.push_back(std::move(OpInfo));
+ }
+}
+
+InstructionEncoding::InstructionEncoding(const Record *EncodingDef,
+ const CodeGenInstruction *Inst)
+ : EncodingDef(EncodingDef), Inst(Inst) {
+ const Record *InstDef = Inst->TheDef;
+
+ // Give this encoding a name.
+ if (EncodingDef != InstDef)
+ Name = (EncodingDef->getName() + Twine(':')).str();
+ Name.append(InstDef->getName());
+
+ DecoderNamespace = EncodingDef->getValueAsString("DecoderNamespace");
+ DecoderMethod = EncodingDef->getValueAsString("DecoderMethod");
+ if (!DecoderMethod.empty())
+ HasCompleteDecoder = EncodingDef->getValueAsBit("hasCompleteDecoder");
+
+ const RecordVal *InstField = EncodingDef->getValue("Inst");
+ if (const auto *DI = dyn_cast<DagInit>(InstField->getValue())) {
+ VarLenInst VLI(DI, InstField);
+ parseVarLenEncoding(VLI);
+ // If the encoding has a custom decoder, don't bother parsing the operands.
+ if (DecoderMethod.empty())
+ parseVarLenOperands(VLI);
+ } else {
+ const auto *BI = cast<BitsInit>(InstField->getValue());
+ parseFixedLenEncoding(*BI);
+ // If the encoding has a custom decoder, don't bother parsing the operands.
+ if (DecoderMethod.empty())
+ parseFixedLenOperands(*BI);
+ }
+
+ if (DecoderMethod.empty()) {
+ // A generated decoder is always successful if none of the operand
+ // decoders can fail (all are always successful).
+ HasCompleteDecoder = all_of(Operands, [](const OperandInfo &Op) {
+ // By default, a generated operand decoder is assumed to always succeed.
+ // This can be overridden by the user.
+ return Op.Decoder.empty() || Op.HasCompleteDecoder;
+ });
+ }
+}
diff --git a/llvm/utils/TableGen/Common/InstructionEncoding.h b/llvm/utils/TableGen/Common/InstructionEncoding.h
new file mode 100644
index 0000000000000..2fd195e17321a
--- /dev/null
+++ b/llvm/utils/TableGen/Common/InstructionEncoding.h
@@ -0,0 +1,150 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_INSTRUCTIONENCODING_H
+#define LLVM_UTILS_TABLEGEN_INSTRUCTIONENCODING_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/KnownBits.h"
+#include <optional>
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+class BitsInit;
+class CodeGenInstruction;
+class Record;
+class RecordVal;
+class VarLenInst;
+
+// Represents a span of bits in the instruction encoding that's based on a span
+// of bits in an operand's encoding.
+//
+// Width is the width of the span.
+// Base is the starting position of that span in the instruction encoding.
+// Offset if the starting position of that span in the operand's encoding.
+// That is, bits {Base + Width - 1, Base} in the instruction encoding form
+// bits {Offset + Width - 1, Offset} in the operands encoding.
+struct EncodingField {
+ unsigned Base, Width, Offset;
+ EncodingField(unsigned B, unsigned W, unsigned O)
+ : Base(B), Width(W), Offset(O) {}
+};
+
+struct OperandInfo {
+ StringRef Name;
+ bool HasNoEncoding = false;
+ std::vector<EncodingField> Fields;
+ std::string Decoder;
+ bool HasCompleteDecoder;
+ std::optional<uint64_t> InitValue;
+
+ OperandInfo(std::string D, bool HCD) : Decoder(D), HasCompleteDecoder(HCD) {}
+
+ void addField(unsigned Base, unsigned Width, unsigned Offset) {
+ Fields.emplace_back(Base, Width, Offset);
+ }
+
+ ArrayRef<EncodingField> fields() const { return Fields; }
+};
+
+/// Represents a parsed InstructionEncoding record or a record derived from it.
+class InstructionEncoding {
+ /// The Record this encoding originates from.
+ const Record *EncodingDef;
+
+ /// The instruction this encoding is for.
+ const CodeGenInstruction *Inst;
+
+ /// The name of this encoding (for debugging purposes).
+ std::string Name;
+
+ /// The namespace in which this encoding exists.
+ StringRef DecoderNamespace;
+
+ /// Known bits of this encoding. This is the value of the `Inst` field
+ /// with any variable references replaced with '?'.
+ KnownBits InstBits;
+
+ /// Mask of bits that should be considered unknown during decoding.
+ /// This is the value of the `SoftFail` field.
+ APInt SoftFailMask;
+
+ /// The name of the function to use for decoding. May be an empty string,
+ /// meaning the decoder is generated.
+ StringRef DecoderMethod;
+
+ /// Whether the custom decoding function always succeeds. If a custom decoder
+ /// function is specified, the value is taken from the target description,
+ /// otherwise it is inferred.
+ bool HasCompleteDecoder;
+
+ /// Information about the operands' contribution to this encoding.
+ SmallVector<OperandInfo, 16> Operands;
+
+public:
+ InstructionEncoding(const Record *EncodingDef,
+ const CodeGenInstruction *Inst);
+
+ /// Returns the Record this encoding originates from.
+ const Record *getRecord() const { return Encodin...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/158505
More information about the llvm-commits
mailing list