[PATCH] MIR Serialization: Start serializing machine operands by serializing physical register operands.
Alex L
arphaman at gmail.com
Thu Jun 18 17:06:35 PDT 2015
2015-06-18 17:02 GMT-07:00 Duncan P. N. Exon Smith <dexonsmith at apple.com>:
>
> > On 2015-Jun-18, at 15:30, Alex Lorenz <arphaman at gmail.com> wrote:
> >
> > Update this patch to use the new lexing approach that was introduces in
> the updated patch for http://reviews.llvm.org/D10521.
> >
> >
> > REPOSITORY
> > rL LLVM
> >
> > http://reviews.llvm.org/D10525
> >
> > Files:
> > lib/CodeGen/MIRParser/MILexer.cpp
> > lib/CodeGen/MIRParser/MILexer.h
> > lib/CodeGen/MIRParser/MIParser.cpp
> > lib/CodeGen/MIRPrinter.cpp
> > test/CodeGen/MIR/X86/expected-machine-operand.mir
> > test/CodeGen/MIR/X86/missing-comma.mir
> > test/CodeGen/MIR/X86/named-registers.mir
> > test/CodeGen/MIR/X86/unknown-register.mir
> >
> > EMAIL PREFERENCES
> > http://reviews.llvm.org/settings/panel/emailpreferences/
> > <D10525.27967.patch>
>
> I have a few comments below. Also, I was hoping Ahmed (or someone else
> with MIR-level familiarity) could comment on the case-insensitive
> register names. Are we safer here than with instruction opcodes?
>
> > Index: lib/CodeGen/MIRParser/MILexer.cpp
> > ===================================================================
> > --- lib/CodeGen/MIRParser/MILexer.cpp
> > +++ lib/CodeGen/MIRParser/MILexer.cpp
> > @@ -67,6 +67,33 @@
> > return C;
> > }
> >
> > +static Cursor lexPercent(Cursor C, MIToken &Token) {
> > + auto Range = C;
> > + C.advance(); // Skip '%'
> > + while (isIdentifierChar(C.peek()))
> > + C.advance();
> > + Token = MIToken(MIToken::NamedRegister, Range.upto(C));
> > + return C;
> > +}
> > +
> > +static MIToken::TokenKind symbolToken(char C) {
> > + switch (C) {
> > + case ',':
> > + return MIToken::comma;
> > + case '=':
> > + return MIToken::equal;
> > + default:
> > + return MIToken::Error;
> > + }
> > +}
> > +
> > +static Cursor lexSymbol(Cursor C, MIToken::TokenKind Kind, MIToken
> &Token) {
> > + auto Range = C;
> > + C.advance();
> > + Token = MIToken(Kind, Range.upto(C));
> > + return C;
> > +}
> > +
> > MILexer::MILexer(StringRef Source) : CurrentSource(Source) {}
> >
> > StringRef MILexer::lexToken(MIToken &Token) {
> > @@ -79,6 +106,11 @@
> > auto Char = C.peek();
> > if (isalpha(Char) || Char == '_')
> > return lexIdentifier(C, Token).remaining();
> > + if (Char == '%')
> > + return lexPercent(C, Token).remaining();
> > + MIToken::TokenKind Kind = symbolToken(Char);
> > + if (Kind != MIToken::Error)
> > + return lexSymbol(C, Kind, Token).remaining();
> > error(C.location(), Twine("unexpected character '") + Twine(Char) +
> "'");
> > return C.remaining();
> > }
> > Index: lib/CodeGen/MIRParser/MILexer.h
> > ===================================================================
> > --- lib/CodeGen/MIRParser/MILexer.h
> > +++ lib/CodeGen/MIRParser/MILexer.h
> > @@ -26,8 +26,13 @@
> > Eof,
> > Error,
> >
> > + // Tokens with no info.
> > + comma,
> > + equal,
> > +
> > // Identifier tokens
> > - Identifier
> > + Identifier,
> > + NamedRegister
> > };
> >
> > private:
> > @@ -41,6 +46,8 @@
> >
> > bool isError() const { return Kind == Error; }
> >
> > + bool isRegister() const { return Kind == NamedRegister; }
> > +
> > bool is(TokenKind K) const { return Kind == K; }
> >
> > bool isNot(TokenKind K) const { return Kind != K; }
> > Index: lib/CodeGen/MIRParser/MIParser.cpp
> > ===================================================================
> > --- lib/CodeGen/MIRParser/MIParser.cpp
> > +++ lib/CodeGen/MIRParser/MIParser.cpp
> > @@ -34,6 +34,8 @@
> > MIToken Token;
> > /// Maps from instruction names to op codes.
> > StringMap<unsigned> Names2InstrOpCodes;
> > + /// Maps from register names to registers.
> > + StringMap<unsigned> Names2Regs;
> >
> > public:
> > MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
> > @@ -50,14 +52,26 @@
> >
> > MachineInstr *parse();
> >
> > + bool parseRegister(unsigned &Reg);
> > +
> > + bool parseRegisterOperand(MachineOperand &Dest, bool IsDef = false);
> > +
> > + bool parseMachineOperand(MachineOperand &Dest);
>
> If you're not adding comments (I don't think you need them, since the
> names and parameters are fairly clear), I think you should kill the
> newlines between these functions.
>
> > +
> > private:
> > void initNames2InstrOpCodes();
> >
> > /// Try to convert an instruction name to an opcode. Return true if
> the
> > /// instruction name is invalid.
> > bool parseInstrName(StringRef InstrName, unsigned &OpCode);
> >
> > bool parseInstruction(unsigned &OpCode);
> > +
> > + void initNames2Regs();
> > +
> > + /// Try to convert a register name to a register number. Return true
> if the
> > + /// register name is invalid.
> > + bool getRegisterByName(StringRef RegName, unsigned &Reg);
> > };
> >
> > } // end anonymous namespace
> > @@ -87,13 +101,61 @@
> > MachineInstr *MIParser::parse() {
> > lex();
> >
> > + // Parse any register operands before '='
> > + // TODO: Allow parsing of multiple operands before '='
> > + MachineOperand MO = MachineOperand::CreateImm(0);
> > + SmallVector<MachineOperand, 8> Operands;
> > + if (Token.isRegister()) {
> > + if (parseRegisterOperand(MO, /*IsDef=*/true))
> > + return nullptr;
> > + Operands.push_back(MO);
> > + if (Token.isNot(MIToken::equal)) {
> > + error("expected '='");
> > + return nullptr;
> > + }
> > + lex();
> > + }
> > +
> > unsigned OpCode;
> > if (Token.isError() || parseInstruction(OpCode))
> > return nullptr;
> >
> > - // TODO: Parse the rest of instruction - machine operands, etc.
> > + // TODO: Parse the instruction flags and memory operands.
> > +
> > + // Parse the remaining machine operands.
> > + while (Token.isNot(MIToken::Eof)) {
> > + if (parseMachineOperand(MO))
> > + return nullptr;
> > + Operands.push_back(MO);
> > + if (Token.is(MIToken::Eof))
> > + break;
> > + if (Token.isNot(MIToken::comma)) {
> > + error("expected ',' before the next machine operand");
> > + return nullptr;
> > + }
> > + lex();
> > + }
> > +
> > const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode);
> > - auto *MI = MF.CreateMachineInstr(MCID, DebugLoc());
> > +
> > + // Verify machine operands
>
> Add a `.` at the end of the sentence.
>
> > + if (!MCID.isVariadic()) {
> > + for (size_t I = 0, E = Operands.size(); I < E; ++I) {
> > + if (I < MCID.getNumOperands())
> > + continue;
> > + // Mark this register as implicit to prevent an assertion when
> it's added
> > + // to an instruction. This is a temporary workaround until the
> implicit
> > + // register flag can be parsed.
> > + Operands[I].setImplicit();
> > + }
> > + }
> > +
> > + // TODO: Determine the implicit behaviour when implicit register
> flags are
> > + // parsed.
> > + auto *MI = MF.CreateMachineInstr(MCID, DebugLoc(),
> /*NoImplicit=*/true);
> > + for (const auto &Operand : Operands) {
> > + MI->addOperand(MF, Operand);
> > + }
>
> Probably don't need the braces?
>
> > return MI;
> > }
> >
> > @@ -103,6 +165,46 @@
> > StringRef InstrName = Token.stringValue();
> > if (parseInstrName(InstrName, OpCode))
> > return error(Twine("unknown machine instruction name '") +
> InstrName + "'");
> > + lex();
> > + return false;
> > +}
> > +
> > +bool MIParser::parseRegister(unsigned &Reg) {
> > + switch (Token.kind()) {
> > + case MIToken::NamedRegister: {
> > + StringRef Name = Token.stringValue().drop_front(1); // Drop the '%'
> > + if (getRegisterByName(Name, Reg))
> > + return error(Twine("unknown register name '") + Name + "'");
> > + break;
> > + }
> > + // TODO: Parse other register kinds.
> > + default:
> > + llvm_unreachable("The current token should be a register");
> > + }
> > + return false;
> > +}
> > +
> > +bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) {
> > + unsigned Reg;
> > + // TODO: Parse register flags
>
> Add a `.`.
>
> > + if (parseRegister(Reg))
> > + return true;
> > + lex();
> > + // TODO: Parse subregister.
> > + Dest = MachineOperand::CreateReg(Reg, IsDef);
> > + return false;
> > +}
> > +
> > +bool MIParser::parseMachineOperand(MachineOperand &Dest) {
> > + switch (Token.kind()) {
> > + case MIToken::NamedRegister:
> > + return parseRegisterOperand(Dest);
> > + case MIToken::Error:
> > + return true;
> > + default:
> > + // TODO: parse the other machine operands.
> > + return error("expected a machine operand");
> > + }
> > return false;
> > }
> >
> > @@ -125,6 +227,24 @@
> > return false;
> > }
> >
> > +void MIParser::initNames2Regs() {
> > + if (!Names2Regs.empty())
> > + return;
> > + const auto *TRI = MF.getSubtarget().getRegisterInfo();
> > + assert(TRI && "Expected target register info");
> > + for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I)
> > +
> Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I));
>
> Is this safe for registers? (Ahmed?)
>
> Anyway, we should at least check:
>
> for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) {
> bool WasInserted =
>
> Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(),
> I)).second;
> (void)WasInserted;
> assert(WasInserted && "Expected registers to be unique
> case-insensitively");
> }
>
> Or maybe there's somewhere we can put this check to catch it at tablegen
> time.
>
> > +}
> > +
> > +bool MIParser::getRegisterByName(StringRef RegName, unsigned &Reg) {
> > + initNames2Regs();
> > + auto RegInfo = Names2Regs.find(RegName);
> > + if (RegInfo == Names2Regs.end())
> > + return true;
> > + Reg = RegInfo->getValue();
> > + return false;
> > +}
> > +
> > MachineInstr *llvm::parseMachineInstr(SourceMgr &SM, MachineFunction
> &MF,
> > StringRef Src, SMDiagnostic
> &Error) {
> > return MIParser(SM, MF, Error, Src).parse();
> > Index: lib/CodeGen/MIRPrinter.cpp
> > ===================================================================
> > --- lib/CodeGen/MIRPrinter.cpp
> > +++ lib/CodeGen/MIRPrinter.cpp
> > @@ -50,6 +50,8 @@
> > MIPrinter(raw_ostream &OS) : OS(OS) {}
> >
> > void print(const MachineInstr &MI);
> > +
>
> I don't think this newline adds any value.
>
> > + void print(const MachineOperand &Op, const TargetRegisterInfo *TRI);
> > };
> >
> > } // end anonymous namespace
> > @@ -110,10 +112,57 @@
> >
> > void MIPrinter::print(const MachineInstr &MI) {
> > const auto &SubTarget = MI.getParent()->getParent()->getSubtarget();
> > + const auto *TRI = SubTarget.getRegisterInfo();
> > + assert(TRI && "Expected target register info");
> > const auto *TII = SubTarget.getInstrInfo();
> >
> > + unsigned I = 0, E = MI.getNumOperands();
> > + for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef()
> &&
> > + !MI.getOperand(I).isImplicit();
> > + ++I) {
> > + if (I)
> > + OS << ", ";
> > + print(MI.getOperand(I), TRI);
> > + }
> > +
> > + if (I)
> > + OS << " = ";
> > OS << StringRef(TII->getName(MI.getOpcode())).lower();
> > - // TODO: Print the instruction flags, machine operands, machine mem
> operands.
> > + // TODO: Print the instruction flags, machine mem operands.
> > + if (I < E)
> > + OS << ' ';
> > +
> > + bool NeedComma = false;
> > + for (; I < E; ++I) {
> > + if (NeedComma)
> > + OS << ", ";
> > + print(MI.getOperand(I), TRI);
> > + NeedComma = true;
> > + }
> > +}
> > +
> > +static void printReg(unsigned Reg, raw_ostream &OS,
> > + const TargetRegisterInfo *TRI) {
> > + // TODO: Print Stack Slots
> > + // TODO: Print no register
> > + // TODO: Print virtual registers
> > + if (Reg < TRI->getNumRegs())
> > + OS << '%' << StringRef(TRI->getName(Reg)).lower();
> > + else
> > + llvm_unreachable("Can't print this kind of register yet");
> > +}
> > +
> > +void MIPrinter::print(const MachineOperand &Op, const
> TargetRegisterInfo *TRI) {
> > + switch (Op.getType()) {
> > + case MachineOperand::MO_Register:
> > + // TODO: Print register flags.
> > + printReg(Op.getReg(), OS, TRI);
> > + // TODO: Print sub register.
> > + break;
> > + default:
> > + // TODO: Print the other machine operands.
> > + llvm_unreachable("Can't print this machine operand at the moment");
> > + }
> > }
> >
> > void llvm::printMIR(raw_ostream &OS, const Module &M) {
> > Index: test/CodeGen/MIR/X86/expected-machine-operand.mir
> > ===================================================================
> > --- /dev/null
> > +++ test/CodeGen/MIR/X86/expected-machine-operand.mir
> > @@ -0,0 +1,20 @@
> > +# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after
> branch-folder -o /dev/null %s 2>&1 | FileCheck %s
> > +
> > +--- |
> > +
> > + define i32 @foo() {
> > + entry:
> > + ret i32 0
> > + }
> > +
> > +...
> > +---
> > +name: foo
> > +body:
> > + - name: entry
> > + instructions:
> > + # CHECK: 1:16: expected a machine operand
>
> Just noticed this error doesn't give the correct line number, just a
> local line number for the instruction. Why not? I guess I missed this
> in the earlier instruction patches? (As long as it's fixable, I don't
> think it's urgent to fix immediately, but it's kind of broken.)
>
There's a TODO for this in the previous lexing patch, the MI string error
locations aren't translated into the full file locations yet. There will be
a separate patch that will fix this.
>
> > + - '%eax = xor32rr ='
> > + - 'retq %eax'
> > +...
> > +
> > Index: test/CodeGen/MIR/X86/missing-comma.mir
> > ===================================================================
> > --- /dev/null
> > +++ test/CodeGen/MIR/X86/missing-comma.mir
> > @@ -0,0 +1,20 @@
> > +# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after
> branch-folder -o /dev/null %s 2>&1 | FileCheck %s
> > +
> > +--- |
> > +
> > + define i32 @foo() {
> > + entry:
> > + ret i32 0
> > + }
> > +
> > +...
> > +---
> > +name: foo
> > +body:
> > + - name: entry
> > + instructions:
> > + # CHECK: 1:21: expected ',' before the next machine operand
> > + - '%eax = xor32rr %eax %eflags'
> > + - 'retq %eax'
> > +...
> > +
> > Index: test/CodeGen/MIR/X86/named-registers.mir
> > ===================================================================
> > --- /dev/null
> > +++ test/CodeGen/MIR/X86/named-registers.mir
> > @@ -0,0 +1,22 @@
> > +# RUN: llc -march=x86-64 -start-after branch-folder -stop-after
> branch-folder -o /dev/null %s | FileCheck %s
> > +# This test ensures that the MIR parser parses X86 registers coorectly.
> > +
> > +--- |
> > +
> > + define i32 @foo() {
> > + entry:
> > + ret i32 0
> > + }
> > +
> > +...
> > +---
> > +# CHECK: name: foo
> > +name: foo
> > +body:
> > + - name: entry
> > + instructions:
> > + # CHECK: - '%eax = mov32r0
> > + # CHECK-NEXT: - 'retq %eax
> > + - '%eax = mov32r0'
> > + - 'retq %eax'
> > +...
> > Index: test/CodeGen/MIR/X86/unknown-register.mir
> > ===================================================================
> > --- /dev/null
> > +++ test/CodeGen/MIR/X86/unknown-register.mir
> > @@ -0,0 +1,21 @@
> > +# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after
> branch-folder -o /dev/null %s 2>&1 | FileCheck %s
> > +# This test ensures that an error is reported when an unknown register
> is
> > +# encountered.
> > +
> > +--- |
> > +
> > + define i32 @foo() {
> > + entry:
> > + ret i32 0
> > + }
> > +
> > +...
> > +---
> > +name: foo
> > +body:
> > + - name: entry
> > + instructions:
> > + # CHECK: 1:1: unknown register name 'xax'
> > + - '%xax = mov32r0'
> > + - 'retq %xax'
> > +...
> >
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150618/c61f916a/attachment.html>
More information about the llvm-commits
mailing list