[llvm] a538d1f - [TableGen][CodeEmitterGen] Allow local names for sub-operands in a operand list.

Sat Sep 24 06:41:31 PDT 2022

Author: James Y Knight
Date: 2022-09-24T09:40:44-04:00
New Revision: a538d1f13a1375c212c7018e36deab2cd896bf6e

URL: https://github.com/llvm/llvm-project/commit/a538d1f13a1375c212c7018e36deab2cd896bf6e
DIFF: https://github.com/llvm/llvm-project/commit/a538d1f13a1375c212c7018e36deab2cd896bf6e.diff

LOG: [TableGen][CodeEmitterGen] Allow local names for sub-operands in a operand list.

These names can then be matched by name against 'bits' fields in a
record, to populate an instruction's encoding.

This does _not_ yet change DecoderEmitter to allow by-name matching of
sub-operands. Unlike the encoder, the decoder already defaulted to not
supporting positional matching, and backends had workarounds in place
for the missing decoding support.

Additionally, use this new capability to allow the ARM and AArch64
backends not to require any positional operand matching.

Differential Revision: https://reviews.llvm.org/D131003

Added: 
    

Modified: 
    llvm/lib/TableGen/Record.cpp
    llvm/lib/Target/AArch64/AArch64InstrFormats.td
    llvm/lib/Target/ARM/ARMInstrThumb2.td
    llvm/utils/TableGen/CodeEmitterGen.cpp
    llvm/utils/TableGen/CodeGenInstruction.cpp
    llvm/utils/TableGen/CodeGenInstruction.h
    llvm/utils/TableGen/DecoderEmitter.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index 6b899a049e6b0..053322ed0ca52 100644

--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -2238,6 +2238,7 @@ static void ProfileDagInit(FoldingSetNodeID &ID, Init *V, StringInit *VN,
 
 DagInit *DagInit::get(Init *V, StringInit *VN, ArrayRef<Init *> ArgRange,
                       ArrayRef<StringInit *> NameRange) {
+  assert(ArgRange.size() == NameRange.size());
   FoldingSetNodeID ID;
   ProfileDagInit(ID, V, VN, ArgRange, NameRange);
 

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index da945ca7cb0cd..d5d007ceea249 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -2483,27 +2483,23 @@ class BaseAddSubRegPseudo<RegisterClass regtype,
 class BaseAddSubSReg<bit isSub, bit setFlags, RegisterClass regtype,
                      arith_shifted_reg shifted_regtype, string asm,
                      SDPatternOperator OpNode>
-    : I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm),
-        asm, "\t$Rd, $Rn, $Rm", "",
-        [(set regtype:$Rd, (OpNode regtype:$Rn, shifted_regtype:$Rm))]>,
+    : I<(outs regtype:$Rd), (ins regtype:$Rn, (shifted_regtype $Rm, $shift):$Rm_and_shift),
+        asm, "\t$Rd, $Rn, $Rm_and_shift", "",
+        [(set regtype:$Rd, (OpNode regtype:$Rn, shifted_regtype:$Rm_and_shift))]>,
       Sched<[WriteISReg, ReadI, ReadISReg]> {
-  // The operands are in order to match the 'addr' MI operands, so we
-  // don't need an encoder method and by-name matching. Just use the default
-  // in-order handling. Since we're using by-order, make sure the names
-  // do not match.
-  bits<5> dst;
-  bits<5> src1;
-  bits<5> src2;
+  bits<5> Rd;
+  bits<5> Rn;
+  bits<5> Rm;
   bits<8> shift;
   let Inst{30}    = isSub;
   let Inst{29}    = setFlags;
   let Inst{28-24} = 0b01011;
   let Inst{23-22} = shift{7-6};
   let Inst{21}    = 0;
-  let Inst{20-16} = src2;
+  let Inst{20-16} = Rm;
   let Inst{15-10} = shift{5-0};
-  let Inst{9-5}   = src1;
-  let Inst{4-0}   = dst;
+  let Inst{9-5}   = Rn;
+  let Inst{4-0}   = Rd;
 
   let DecoderMethod = "DecodeThreeAddrSRegInstruction";
 }
@@ -2511,22 +2507,22 @@ class BaseAddSubSReg<bit isSub, bit setFlags, RegisterClass regtype,
 class BaseAddSubEReg<bit isSub, bit setFlags, RegisterClass dstRegtype,
                      RegisterClass src1Regtype, Operand src2Regtype,
                      string asm, SDPatternOperator OpNode>
-    : I<(outs dstRegtype:$R1),
-        (ins src1Regtype:$R2, src2Regtype:$R3),
-        asm, "\t$R1, $R2, $R3", "",
-        [(set dstRegtype:$R1, (OpNode src1Regtype:$R2, src2Regtype:$R3))]>,
+    : I<(outs dstRegtype:$Rd),
+        (ins src1Regtype:$Rn, (src2Regtype $Rm, $extend):$Rm_and_extend),
+        asm, "\t$Rd, $Rn, $Rm_and_extend", "",
+        [(set dstRegtype:$Rd, (OpNode src1Regtype:$Rn, src2Regtype:$Rm_and_extend))]>,
       Sched<[WriteIEReg, ReadI, ReadIEReg]> {
   bits<5> Rd;
   bits<5> Rn;
   bits<5> Rm;
-  bits<6> ext;
+  bits<6> extend;
   let Inst{30}    = isSub;
   let Inst{29}    = setFlags;
   let Inst{28-24} = 0b01011;
   let Inst{23-21} = 0b001;
   let Inst{20-16} = Rm;
-  let Inst{15-13} = ext{5-3};
-  let Inst{12-10} = ext{2-0};
+  let Inst{15-13} = extend{5-3};
+  let Inst{12-10} = extend{2-0};
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Rd;
 
@@ -2913,25 +2909,21 @@ class BaseLogicalImm<bits<2> opc, RegisterClass dregtype,
 class BaseLogicalSReg<bits<2> opc, bit N, RegisterClass regtype,
                       logical_shifted_reg shifted_regtype, string asm,
                       list<dag> pattern>
-    : I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm),
-        asm, "\t$Rd, $Rn, $Rm", "", pattern>,
+    : I<(outs regtype:$Rd), (ins regtype:$Rn, (shifted_regtype $Rm, $shift):$Rm_and_shift),
+        asm, "\t$Rd, $Rn, $Rm_and_shift", "", pattern>,
       Sched<[WriteISReg, ReadI, ReadISReg]> {
-  // The operands are in order to match the 'addr' MI operands, so we
-  // don't need an encoder method and by-name matching. Just use the default
-  // in-order handling. Since we're using by-order, make sure the names
-  // do not match.
-  bits<5> dst;
-  bits<5> src1;
-  bits<5> src2;
+  bits<5> Rd;
+  bits<5> Rn;
+  bits<5> Rm;
   bits<8> shift;
   let Inst{30-29} = opc;
   let Inst{28-24} = 0b01010;
   let Inst{23-22} = shift{7-6};
   let Inst{21}    = N;
-  let Inst{20-16} = src2;
+  let Inst{20-16} = Rm;
   let Inst{15-10} = shift{5-0};
-  let Inst{9-5}   = src1;
-  let Inst{4-0}   = dst;
+  let Inst{9-5}   = Rn;
+  let Inst{4-0}   = Rd;
 
   let DecoderMethod = "DecodeThreeAddrSRegInstruction";
 }
@@ -3002,12 +2994,12 @@ multiclass LogicalReg<bits<2> opc, bit N, string mnemonic,
 
   def Wrs : BaseLogicalSReg<opc, N, GPR32, logical_shifted_reg32, mnemonic,
                             [(set GPR32:$Rd, (OpNode GPR32:$Rn,
-                                                 logical_shifted_reg32:$Rm))]> {
+                                                 logical_shifted_reg32:$Rm_and_shift))]> {
     let Inst{31} = 0;
   }
   def Xrs : BaseLogicalSReg<opc, N, GPR64, logical_shifted_reg64, mnemonic,
                             [(set GPR64:$Rd, (OpNode GPR64:$Rn,
-                                                 logical_shifted_reg64:$Rm))]> {
+                                                 logical_shifted_reg64:$Rm_and_shift))]> {
     let Inst{31} = 1;
   }
 
@@ -3025,11 +3017,11 @@ multiclass LogicalRegS<bits<2> opc, bit N, string mnemonic,
   def Xrr : BaseLogicalRegPseudo<GPR64, OpNode>;
 
   def Wrs : BaseLogicalSReg<opc, N, GPR32, logical_shifted_reg32, mnemonic,
-            [(set GPR32:$Rd, (OpNode GPR32:$Rn, logical_shifted_reg32:$Rm))]> {
+            [(set GPR32:$Rd, (OpNode GPR32:$Rn, logical_shifted_reg32:$Rm_and_shift))]> {
     let Inst{31} = 0;
   }
   def Xrs : BaseLogicalSReg<opc, N, GPR64, logical_shifted_reg64, mnemonic,
-            [(set GPR64:$Rd, (OpNode GPR64:$Rn, logical_shifted_reg64:$Rm))]> {
+            [(set GPR64:$Rd, (OpNode GPR64:$Rn, logical_shifted_reg64:$Rm_and_shift))]> {
     let Inst{31} = 1;
   }
   } // Defs = [NZCV]

diff  --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index c80c184bd77ae..f102e5e1c9792 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -3896,7 +3896,7 @@ def t2TBH_JT : t2PseudoInst<(outs),
         (ins GPR:$base, GPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
         Sched<[WriteBr]>;
 
-def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br,
+def t2TBB : T2I<(outs), (ins (addrmode_tbb $Rn, $Rm):$addr), IIC_Br,
                     "tbb", "\t$addr", []>, Sched<[WriteBrTbl]> {
   bits<4> Rn;
   bits<4> Rm;
@@ -3909,7 +3909,7 @@ def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br,
   let DecoderMethod = "DecodeThumbTableBranch";
 }
 
-def t2TBH : T2I<(outs), (ins addrmode_tbh:$addr), IIC_Br,
+def t2TBH : T2I<(outs), (ins (addrmode_tbh $Rn, $Rm):$addr), IIC_Br,
                    "tbh", "\t$addr", []>, Sched<[WriteBrTbl]> {
   bits<4> Rn;
   bits<4> Rm;

diff  --git a/llvm/utils/TableGen/CodeEmitterGen.cpp b/llvm/utils/TableGen/CodeEmitterGen.cpp
index 004f94f18b76f..31b2eb95f58ee 100644
--- a/llvm/utils/TableGen/CodeEmitterGen.cpp
+++ b/llvm/utils/TableGen/CodeEmitterGen.cpp
@@ -105,7 +105,10 @@ AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName,
   // operand number. Non-matching operands are assumed to be in
   // order.
   unsigned OpIdx;
-  if (CGI.Operands.hasOperandNamed(VarName, OpIdx)) {
+  std::pair<unsigned, unsigned> SubOp;
+  if (CGI.Operands.hasSubOperandAlias(VarName, SubOp)) {
+    OpIdx = CGI.Operands[SubOp.first].MIOperandNo + SubOp.second;
+  } else if (CGI.Operands.hasOperandNamed(VarName, OpIdx)) {
     // Get the machine operand number for the indicated operand.
     OpIdx = CGI.Operands[OpIdx].MIOperandNo;
     assert(!CGI.Operands.isFlatOperandNotEmitted(OpIdx) &&
@@ -133,7 +136,7 @@ AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName,
 
     OpIdx = NumberedOp++;
   }
-  
+
   std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(OpIdx);
   std::string &EncoderMethodName = CGI.Operands[SO.first].EncoderMethodName;
 

diff  --git a/llvm/utils/TableGen/CodeGenInstruction.cpp b/llvm/utils/TableGen/CodeGenInstruction.cpp
index 1dbd8ae9c2a27..ce70e2dac30a5 100644
--- a/llvm/utils/TableGen/CodeGenInstruction.cpp
+++ b/llvm/utils/TableGen/CodeGenInstruction.cpp
@@ -67,6 +67,10 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
       ArgName = InDI->getArgNameStr(i-NumDefs);
     }
 
+    DagInit *SubArgDag = dyn_cast<DagInit>(ArgInit);
+    if (SubArgDag)
+      ArgInit = SubArgDag->getOperator();
+
     DefInit *Arg = dyn_cast<DefInit>(ArgInit);
     if (!Arg)
       PrintFatalError(R->getLoc(), "Illegal operand for the '" + R->getName() +
@@ -132,6 +136,36 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
                           Twine(i) +
                           " has the same name as a previous operand!");
 
+    if (SubArgDag) {
+      if (SubArgDag->getNumArgs() != NumOps) {
+        PrintFatalError(R->getLoc(), "In instruction '" + R->getName() +
+                                         "', operand #" + Twine(i) + " has " +
+                                         Twine(SubArgDag->getNumArgs()) +
+                                         " sub-arg names, expected " +
+                                         Twine(NumOps) + ".");
+      }
+
+      for (unsigned j = 0; j < NumOps; ++j) {
+        if (!isa<UnsetInit>(SubArgDag->getArg(j)))
+          PrintFatalError(R->getLoc(),
+                          "In instruction '" + R->getName() + "', operand #" +
+                              Twine(i) + " sub-arg #" + Twine(j) +
+                              " has unexpected operand (expected only $name).");
+
+        StringRef SubArgName = SubArgDag->getArgNameStr(j);
+        if (SubArgName.empty())
+          PrintFatalError(R->getLoc(), "In instruction '" + R->getName() +
+                                           "', operand #" + Twine(i) +
+                                           " has no name!");
+        if (!OperandNames.insert(std::string(SubArgName)).second)
+          PrintFatalError(R->getLoc(),
+                          "In instruction '" + R->getName() + "', operand #" +
+                              Twine(i) + " sub-arg #" + Twine(j) +
+                              " has the same name as a previous operand!");
+        SubOpAliases[SubArgName] = std::make_pair(MIOperandNo, j);
+      }
+    }
+
     OperandList.emplace_back(
         Rec, std::string(ArgName), std::string(PrintMethod),
         std::string(EncoderMethod), OperandNamespace + "::" + OperandType,
@@ -175,6 +209,17 @@ bool CGIOperandList::hasOperandNamed(StringRef Name, unsigned &OpIdx) const {
   return false;
 }
 
+bool CGIOperandList::hasSubOperandAlias(
+    StringRef Name, std::pair<unsigned, unsigned> &SubOp) const {
+  assert(!Name.empty() && "Cannot search for operand with no name!");
+  auto SubOpIter = SubOpAliases.find(Name);
+  if (SubOpIter != SubOpAliases.end()) {
+    SubOp = SubOpIter->second;
+    return true;
+  }
+  return false;
+}
+
 std::pair<unsigned,unsigned>
 CGIOperandList::ParseOperandName(StringRef Op, bool AllowWholeOp) {
   if (Op.empty() || Op[0] != '$')
@@ -195,7 +240,21 @@ CGIOperandList::ParseOperandName(StringRef Op, bool AllowWholeOp) {
     OpName = OpName.substr(0, DotIdx);
   }
 
-  unsigned OpIdx = getOperandNamed(OpName);
+  unsigned OpIdx;
+
+  if (std::pair<unsigned, unsigned> SubOp; hasSubOperandAlias(OpName, SubOp)) {
+    // Found a name for a piece of an operand, just return it directly.
+    if (!SubOpName.empty()) {
+      PrintFatalError(
+          TheDef->getLoc(),
+          TheDef->getName() +
+              ": Cannot use dotted suboperand name within suboperand '" +
+              OpName + "'");
+    }
+    return SubOp;
+  }
+
+  OpIdx = getOperandNamed(OpName);
 
   if (SubOpName.empty()) {  // If no suboperand name was specified:
     // If one was needed, throw.

diff  --git a/llvm/utils/TableGen/CodeGenInstruction.h b/llvm/utils/TableGen/CodeGenInstruction.h
index 067ea3b3f4e70..07a1bd276b191 100644
--- a/llvm/utils/TableGen/CodeGenInstruction.h
+++ b/llvm/utils/TableGen/CodeGenInstruction.h
@@ -14,6 +14,7 @@
 #define LLVM_UTILS_TABLEGEN_CODEGENINSTRUCTION_H
 
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/MachineValueType.h"
 #include <cassert>
@@ -149,6 +150,9 @@ template <typename T> class ArrayRef;
     /// type (which is a record).
     std::vector<OperandInfo> OperandList;
 
+    /// SubOpAliases - List of alias names for suboperands.
+    StringMap<std::pair<unsigned, unsigned>> SubOpAliases;
+
     // Information gleaned from the operand list.
     bool isPredicable;
     bool hasOptionalDef;
@@ -179,6 +183,9 @@ template <typename T> class ArrayRef;
     /// operand. Otherwise, return false.
     bool hasOperandNamed(StringRef Name, unsigned &OpIdx) const;
 
+    bool hasSubOperandAlias(StringRef Name,
+                            std::pair<unsigned, unsigned> &SubOp) const;
+
     /// ParseOperandName - Parse an operand name like "$foo" or "$foo.bar",
     /// where $foo is a whole operand and $foo.bar refers to a suboperand.
     /// This aborts if the name is invalid.  If AllowWholeOp is true, references

diff  --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 5b0990a914000..d75ee3492539e 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -2160,7 +2160,10 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
       // to interpret it.  As a first step, require the target to provide
       // callbacks for decoding register classes.
 
-      OperandInfo OpInfo = getOpInfo(cast<DefInit>(Op.first)->getDef());
+      Init *OpInit = Op.first;
+      if (DagInit *Dag = dyn_cast<DagInit>(OpInit))
+        OpInit = Dag->getOperator();
+      OperandInfo OpInfo = getOpInfo(cast<DefInit>(OpInit)->getDef());
 
       // Some bits of the operand may be required to be 1 depending on the
       // instruction's encoding. Collect those bits.