[PATCH] MIR Serialization: Start serializing machine operands by serializing physical register operands.

Thu Jun 18 17:06:35 PDT 2015

2015-06-18 17:02 GMT-07:00 Duncan P. N. Exon Smith <dexonsmith at apple.com>:

>
> > On 2015-Jun-18, at 15:30, Alex Lorenz <arphaman at gmail.com> wrote:
> >
> > Update this patch to use the new lexing approach that was introduces in
> the updated patch for http://reviews.llvm.org/D10521.
> >
> >
> > REPOSITORY
> >  rL LLVM
> >
> > http://reviews.llvm.org/D10525
> >
> > Files:
> >  lib/CodeGen/MIRParser/MILexer.cpp
> >  lib/CodeGen/MIRParser/MILexer.h
> >  lib/CodeGen/MIRParser/MIParser.cpp
> >  lib/CodeGen/MIRPrinter.cpp
> >  test/CodeGen/MIR/X86/expected-machine-operand.mir
> >  test/CodeGen/MIR/X86/missing-comma.mir
> >  test/CodeGen/MIR/X86/named-registers.mir
> >  test/CodeGen/MIR/X86/unknown-register.mir
> >
> > EMAIL PREFERENCES
> >  http://reviews.llvm.org/settings/panel/emailpreferences/
> > <D10525.27967.patch>
>
> I have a few comments below.  Also, I was hoping Ahmed (or someone else
> with MIR-level familiarity) could comment on the case-insensitive
> register names.  Are we safer here than with instruction opcodes?
>
> > Index: lib/CodeGen/MIRParser/MILexer.cpp
> > ===================================================================
> > --- lib/CodeGen/MIRParser/MILexer.cpp
> > +++ lib/CodeGen/MIRParser/MILexer.cpp
> > @@ -67,6 +67,33 @@
> >    return C;
> >  }
> >
> > +static Cursor lexPercent(Cursor C, MIToken &Token) {
> > +  auto Range = C;
> > +  C.advance(); // Skip '%'
> > +  while (isIdentifierChar(C.peek()))
> > +    C.advance();
> > +  Token = MIToken(MIToken::NamedRegister, Range.upto(C));
> > +  return C;
> > +}
> > +
> > +static MIToken::TokenKind symbolToken(char C) {
> > +  switch (C) {
> > +  case ',':
> > +    return MIToken::comma;
> > +  case '=':
> > +    return MIToken::equal;
> > +  default:
> > +    return MIToken::Error;
> > +  }
> > +}
> > +
> > +static Cursor lexSymbol(Cursor C, MIToken::TokenKind Kind, MIToken
> &Token) {
> > +  auto Range = C;
> > +  C.advance();
> > +  Token = MIToken(Kind, Range.upto(C));
> > +  return C;
> > +}
> > +
> >  MILexer::MILexer(StringRef Source) : CurrentSource(Source) {}
> >
> >  StringRef MILexer::lexToken(MIToken &Token) {
> > @@ -79,6 +106,11 @@
> >    auto Char = C.peek();
> >    if (isalpha(Char) || Char == '_')
> >      return lexIdentifier(C, Token).remaining();
> > +  if (Char == '%')
> > +    return lexPercent(C, Token).remaining();
> > +  MIToken::TokenKind Kind = symbolToken(Char);
> > +  if (Kind != MIToken::Error)
> > +    return lexSymbol(C, Kind, Token).remaining();
> >    error(C.location(), Twine("unexpected character '") + Twine(Char) +
> "'");
> >    return C.remaining();
> >  }
> > Index: lib/CodeGen/MIRParser/MILexer.h
> > ===================================================================
> > --- lib/CodeGen/MIRParser/MILexer.h
> > +++ lib/CodeGen/MIRParser/MILexer.h
> > @@ -26,8 +26,13 @@
> >      Eof,
> >      Error,
> >
> > +    // Tokens with no info.
> > +    comma,
> > +    equal,
> > +
> >      // Identifier tokens
> > -    Identifier
> > +    Identifier,
> > +    NamedRegister
> >    };
> >
> >  private:
> > @@ -41,6 +46,8 @@
> >
> >    bool isError() const { return Kind == Error; }
> >
> > +  bool isRegister() const { return Kind == NamedRegister; }
> > +
> >    bool is(TokenKind K) const { return Kind == K; }
> >
> >    bool isNot(TokenKind K) const { return Kind != K; }
> > Index: lib/CodeGen/MIRParser/MIParser.cpp
> > ===================================================================
> > --- lib/CodeGen/MIRParser/MIParser.cpp
> > +++ lib/CodeGen/MIRParser/MIParser.cpp
> > @@ -34,6 +34,8 @@
> >    MIToken Token;
> >    /// Maps from instruction names to op codes.
> >    StringMap<unsigned> Names2InstrOpCodes;
> > +  /// Maps from register names to registers.
> > +  StringMap<unsigned> Names2Regs;
> >
> >  public:
> >    MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
> > @@ -50,14 +52,26 @@
> >
> >    MachineInstr *parse();
> >
> > +  bool parseRegister(unsigned &Reg);
> > +
> > +  bool parseRegisterOperand(MachineOperand &Dest, bool IsDef = false);
> > +
> > +  bool parseMachineOperand(MachineOperand &Dest);
>
> If you're not adding comments (I don't think you need them, since the
> names and parameters are fairly clear), I think you should kill the
> newlines between these functions.
>
> > +
> >  private:
> >    void initNames2InstrOpCodes();
> >
> >    /// Try to convert an instruction name to an opcode. Return true if
> the
> >    /// instruction name is invalid.
> >    bool parseInstrName(StringRef InstrName, unsigned &OpCode);
> >
> >    bool parseInstruction(unsigned &OpCode);
> > +
> > +  void initNames2Regs();
> > +
> > +  /// Try to convert a register name to a register number. Return true
> if the
> > +  /// register name is invalid.
> > +  bool getRegisterByName(StringRef RegName, unsigned &Reg);
> >  };
> >
> >  } // end anonymous namespace
> > @@ -87,13 +101,61 @@
> >  MachineInstr *MIParser::parse() {
> >    lex();
> >
> > +  // Parse any register operands before '='
> > +  // TODO: Allow parsing of multiple operands before '='
> > +  MachineOperand MO = MachineOperand::CreateImm(0);
> > +  SmallVector<MachineOperand, 8> Operands;
> > +  if (Token.isRegister()) {
> > +    if (parseRegisterOperand(MO, /*IsDef=*/true))
> > +      return nullptr;
> > +    Operands.push_back(MO);
> > +    if (Token.isNot(MIToken::equal)) {
> > +      error("expected '='");
> > +      return nullptr;
> > +    }
> > +    lex();
> > +  }
> > +
> >    unsigned OpCode;
> >    if (Token.isError() || parseInstruction(OpCode))
> >      return nullptr;
> >
> > -  // TODO: Parse the rest of instruction - machine operands, etc.
> > +  // TODO: Parse the instruction flags and memory operands.
> > +
> > +  // Parse the remaining machine operands.
> > +  while (Token.isNot(MIToken::Eof)) {
> > +    if (parseMachineOperand(MO))
> > +      return nullptr;
> > +    Operands.push_back(MO);
> > +    if (Token.is(MIToken::Eof))
> > +      break;
> > +    if (Token.isNot(MIToken::comma)) {
> > +      error("expected ',' before the next machine operand");
> > +      return nullptr;
> > +    }
> > +    lex();
> > +  }
> > +
> >    const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode);
> > -  auto *MI = MF.CreateMachineInstr(MCID, DebugLoc());
> > +
> > +  // Verify machine operands
>
> Add a `.` at the end of the sentence.
>
> > +  if (!MCID.isVariadic()) {
> > +    for (size_t I = 0, E = Operands.size(); I < E; ++I) {
> > +      if (I < MCID.getNumOperands())
> > +        continue;
> > +      // Mark this register as implicit to prevent an assertion when
> it's added
> > +      // to an instruction. This is a temporary workaround until the
> implicit
> > +      // register flag can be parsed.
> > +      Operands[I].setImplicit();
> > +    }
> > +  }
> > +
> > +  // TODO: Determine the implicit behaviour when implicit register
> flags are
> > +  // parsed.
> > +  auto *MI = MF.CreateMachineInstr(MCID, DebugLoc(),
> /*NoImplicit=*/true);
> > +  for (const auto &Operand : Operands) {
> > +    MI->addOperand(MF, Operand);
> > +  }
>
> Probably don't need the braces?
>
> >    return MI;
> >  }
> >
> > @@ -103,6 +165,46 @@
> >    StringRef InstrName = Token.stringValue();
> >    if (parseInstrName(InstrName, OpCode))
> >      return error(Twine("unknown machine instruction name '") +
> InstrName + "'");
> > +  lex();
> > +  return false;
> > +}
> > +
> > +bool MIParser::parseRegister(unsigned &Reg) {
> > +  switch (Token.kind()) {
> > +  case MIToken::NamedRegister: {
> > +    StringRef Name = Token.stringValue().drop_front(1); // Drop the '%'
> > +    if (getRegisterByName(Name, Reg))
> > +      return error(Twine("unknown register name '") + Name + "'");
> > +    break;
> > +  }
> > +  // TODO: Parse other register kinds.
> > +  default:
> > +    llvm_unreachable("The current token should be a register");
> > +  }
> > +  return false;
> > +}
> > +
> > +bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) {
> > +  unsigned Reg;
> > +  // TODO: Parse register flags
>
> Add a `.`.
>
> > +  if (parseRegister(Reg))
> > +    return true;
> > +  lex();
> > +  // TODO: Parse subregister.
> > +  Dest = MachineOperand::CreateReg(Reg, IsDef);
> > +  return false;
> > +}
> > +
> > +bool MIParser::parseMachineOperand(MachineOperand &Dest) {
> > +  switch (Token.kind()) {
> > +  case MIToken::NamedRegister:
> > +    return parseRegisterOperand(Dest);
> > +  case MIToken::Error:
> > +    return true;
> > +  default:
> > +    // TODO: parse the other machine operands.
> > +    return error("expected a machine operand");
> > +  }
> >    return false;
> >  }
> >
> > @@ -125,6 +227,24 @@
> >    return false;
> >  }
> >
> > +void MIParser::initNames2Regs() {
> > +  if (!Names2Regs.empty())
> > +    return;
> > +  const auto *TRI = MF.getSubtarget().getRegisterInfo();
> > +  assert(TRI && "Expected target register info");
> > +  for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I)
> > +
> Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I));
>
> Is this safe for registers?  (Ahmed?)
>
> Anyway, we should at least check:
>
>     for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) {
>       bool WasInserted =
>
> Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(),
> I)).second;
>       (void)WasInserted;
>       assert(WasInserted && "Expected registers to be unique
> case-insensitively");
>     }
>
> Or maybe there's somewhere we can put this check to catch it at tablegen
> time.
>
> > +}
> > +
> > +bool MIParser::getRegisterByName(StringRef RegName, unsigned &Reg) {
> > +  initNames2Regs();
> > +  auto RegInfo = Names2Regs.find(RegName);
> > +  if (RegInfo == Names2Regs.end())
> > +    return true;
> > +  Reg = RegInfo->getValue();
> > +  return false;
> > +}
> > +
> >  MachineInstr *llvm::parseMachineInstr(SourceMgr &SM, MachineFunction
> &MF,
> >                                        StringRef Src, SMDiagnostic
> &Error) {
> >    return MIParser(SM, MF, Error, Src).parse();
> > Index: lib/CodeGen/MIRPrinter.cpp
> > ===================================================================
> > --- lib/CodeGen/MIRPrinter.cpp
> > +++ lib/CodeGen/MIRPrinter.cpp
> > @@ -50,6 +50,8 @@
> >    MIPrinter(raw_ostream &OS) : OS(OS) {}
> >
> >    void print(const MachineInstr &MI);
> > +
>
> I don't think this newline adds any value.
>
> > +  void print(const MachineOperand &Op, const TargetRegisterInfo *TRI);
> >  };
> >
> >  } // end anonymous namespace
> > @@ -110,10 +112,57 @@
> >
> >  void MIPrinter::print(const MachineInstr &MI) {
> >    const auto &SubTarget = MI.getParent()->getParent()->getSubtarget();
> > +  const auto *TRI = SubTarget.getRegisterInfo();
> > +  assert(TRI && "Expected target register info");
> >    const auto *TII = SubTarget.getInstrInfo();
> >
> > +  unsigned I = 0, E = MI.getNumOperands();
> > +  for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef()
> &&
> > +         !MI.getOperand(I).isImplicit();
> > +       ++I) {
> > +    if (I)
> > +      OS << ", ";
> > +    print(MI.getOperand(I), TRI);
> > +  }
> > +
> > +  if (I)
> > +    OS << " = ";
> >    OS << StringRef(TII->getName(MI.getOpcode())).lower();
> > -  // TODO: Print the instruction flags, machine operands, machine mem
> operands.
> > +  // TODO: Print the instruction flags, machine mem operands.
> > +  if (I < E)
> > +    OS << ' ';
> > +
> > +  bool NeedComma = false;
> > +  for (; I < E; ++I) {
> > +    if (NeedComma)
> > +      OS << ", ";
> > +    print(MI.getOperand(I), TRI);
> > +    NeedComma = true;
> > +  }
> > +}
> > +
> > +static void printReg(unsigned Reg, raw_ostream &OS,
> > +                     const TargetRegisterInfo *TRI) {
> > +  // TODO: Print Stack Slots
> > +  // TODO: Print no register
> > +  // TODO: Print virtual registers
> > +  if (Reg < TRI->getNumRegs())
> > +    OS << '%' << StringRef(TRI->getName(Reg)).lower();
> > +  else
> > +    llvm_unreachable("Can't print this kind of register yet");
> > +}
> > +
> > +void MIPrinter::print(const MachineOperand &Op, const
> TargetRegisterInfo *TRI) {
> > +  switch (Op.getType()) {
> > +  case MachineOperand::MO_Register:
> > +    // TODO: Print register flags.
> > +    printReg(Op.getReg(), OS, TRI);
> > +    // TODO: Print sub register.
> > +    break;
> > +  default:
> > +    // TODO: Print the other machine operands.
> > +    llvm_unreachable("Can't print this machine operand at the moment");
> > +  }
> >  }
> >
> >  void llvm::printMIR(raw_ostream &OS, const Module &M) {
> > Index: test/CodeGen/MIR/X86/expected-machine-operand.mir
> > ===================================================================
> > --- /dev/null
> > +++ test/CodeGen/MIR/X86/expected-machine-operand.mir
> > @@ -0,0 +1,20 @@
> > +# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after
> branch-folder -o /dev/null %s 2>&1 | FileCheck %s
> > +
> > +--- |
> > +
> > +  define i32 @foo() {
> > +  entry:
> > +    ret i32 0
> > +  }
> > +
> > +...
> > +---
> > +name:            foo
> > +body:
> > + - name:         entry
> > +   instructions:
> > +     # CHECK: 1:16: expected a machine operand
>
> Just noticed this error doesn't give the correct line number, just a
> local line number for the instruction.  Why not?  I guess I missed this
> in the earlier instruction patches?  (As long as it's fixable, I don't
> think it's urgent to fix immediately, but it's kind of broken.)
>

There's a TODO for this in the previous lexing patch, the MI string error
locations aren't translated into the full file locations yet. There will be
a separate patch that will fix this.

>
> > +     - '%eax = xor32rr ='
> > +     - 'retq %eax'
> > +...
> > +
> > Index: test/CodeGen/MIR/X86/missing-comma.mir
> > ===================================================================
> > --- /dev/null
> > +++ test/CodeGen/MIR/X86/missing-comma.mir
> > @@ -0,0 +1,20 @@
> > +# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after
> branch-folder -o /dev/null %s 2>&1 | FileCheck %s
> > +
> > +--- |
> > +
> > +  define i32 @foo() {
> > +  entry:
> > +    ret i32 0
> > +  }
> > +
> > +...
> > +---
> > +name:            foo
> > +body:
> > + - name:         entry
> > +   instructions:
> > +     # CHECK: 1:21: expected ',' before the next machine operand
> > +     - '%eax = xor32rr %eax %eflags'
> > +     - 'retq %eax'
> > +...
> > +
> > Index: test/CodeGen/MIR/X86/named-registers.mir
> > ===================================================================
> > --- /dev/null
> > +++ test/CodeGen/MIR/X86/named-registers.mir
> > @@ -0,0 +1,22 @@
> > +# RUN: llc -march=x86-64 -start-after branch-folder -stop-after
> branch-folder -o /dev/null %s | FileCheck %s
> > +# This test ensures that the MIR parser parses X86 registers coorectly.
> > +
> > +--- |
> > +
> > +  define i32 @foo() {
> > +  entry:
> > +    ret i32 0
> > +  }
> > +
> > +...
> > +---
> > +# CHECK: name: foo
> > +name:            foo
> > +body:
> > + - name:         entry
> > +   instructions:
> > +     # CHECK:      - '%eax = mov32r0
> > +     # CHECK-NEXT: - 'retq %eax
> > +     - '%eax = mov32r0'
> > +     - 'retq %eax'
> > +...
> > Index: test/CodeGen/MIR/X86/unknown-register.mir
> > ===================================================================
> > --- /dev/null
> > +++ test/CodeGen/MIR/X86/unknown-register.mir
> > @@ -0,0 +1,21 @@
> > +# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after
> branch-folder -o /dev/null %s 2>&1 | FileCheck %s
> > +# This test ensures that an error is reported when an unknown register
> is
> > +# encountered.
> > +
> > +--- |
> > +
> > +  define i32 @foo() {
> > +  entry:
> > +    ret i32 0
> > +  }
> > +
> > +...
> > +---
> > +name:            foo
> > +body:
> > + - name:         entry
> > +   instructions:
> > +     # CHECK: 1:1: unknown register name 'xax'
> > +     - '%xax = mov32r0'
> > +     - 'retq %xax'
> > +...
> >
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150618/c61f916a/attachment.html>