[PATCH] MIR Serialization: Start serializing machine operands by serializing physical register operands.
Duncan P. N. Exon Smith
dexonsmith at apple.com
Thu Jun 18 17:02:37 PDT 2015
> On 2015-Jun-18, at 15:30, Alex Lorenz <arphaman at gmail.com> wrote:
>
> Update this patch to use the new lexing approach that was introduces in the updated patch for http://reviews.llvm.org/D10521.
>
>
> REPOSITORY
> rL LLVM
>
> http://reviews.llvm.org/D10525
>
> Files:
> lib/CodeGen/MIRParser/MILexer.cpp
> lib/CodeGen/MIRParser/MILexer.h
> lib/CodeGen/MIRParser/MIParser.cpp
> lib/CodeGen/MIRPrinter.cpp
> test/CodeGen/MIR/X86/expected-machine-operand.mir
> test/CodeGen/MIR/X86/missing-comma.mir
> test/CodeGen/MIR/X86/named-registers.mir
> test/CodeGen/MIR/X86/unknown-register.mir
>
> EMAIL PREFERENCES
> http://reviews.llvm.org/settings/panel/emailpreferences/
> <D10525.27967.patch>
I have a few comments below. Also, I was hoping Ahmed (or someone else
with MIR-level familiarity) could comment on the case-insensitive
register names. Are we safer here than with instruction opcodes?
> Index: lib/CodeGen/MIRParser/MILexer.cpp
> ===================================================================
> --- lib/CodeGen/MIRParser/MILexer.cpp
> +++ lib/CodeGen/MIRParser/MILexer.cpp
> @@ -67,6 +67,33 @@
> return C;
> }
>
> +static Cursor lexPercent(Cursor C, MIToken &Token) {
> + auto Range = C;
> + C.advance(); // Skip '%'
> + while (isIdentifierChar(C.peek()))
> + C.advance();
> + Token = MIToken(MIToken::NamedRegister, Range.upto(C));
> + return C;
> +}
> +
> +static MIToken::TokenKind symbolToken(char C) {
> + switch (C) {
> + case ',':
> + return MIToken::comma;
> + case '=':
> + return MIToken::equal;
> + default:
> + return MIToken::Error;
> + }
> +}
> +
> +static Cursor lexSymbol(Cursor C, MIToken::TokenKind Kind, MIToken &Token) {
> + auto Range = C;
> + C.advance();
> + Token = MIToken(Kind, Range.upto(C));
> + return C;
> +}
> +
> MILexer::MILexer(StringRef Source) : CurrentSource(Source) {}
>
> StringRef MILexer::lexToken(MIToken &Token) {
> @@ -79,6 +106,11 @@
> auto Char = C.peek();
> if (isalpha(Char) || Char == '_')
> return lexIdentifier(C, Token).remaining();
> + if (Char == '%')
> + return lexPercent(C, Token).remaining();
> + MIToken::TokenKind Kind = symbolToken(Char);
> + if (Kind != MIToken::Error)
> + return lexSymbol(C, Kind, Token).remaining();
> error(C.location(), Twine("unexpected character '") + Twine(Char) + "'");
> return C.remaining();
> }
> Index: lib/CodeGen/MIRParser/MILexer.h
> ===================================================================
> --- lib/CodeGen/MIRParser/MILexer.h
> +++ lib/CodeGen/MIRParser/MILexer.h
> @@ -26,8 +26,13 @@
> Eof,
> Error,
>
> + // Tokens with no info.
> + comma,
> + equal,
> +
> // Identifier tokens
> - Identifier
> + Identifier,
> + NamedRegister
> };
>
> private:
> @@ -41,6 +46,8 @@
>
> bool isError() const { return Kind == Error; }
>
> + bool isRegister() const { return Kind == NamedRegister; }
> +
> bool is(TokenKind K) const { return Kind == K; }
>
> bool isNot(TokenKind K) const { return Kind != K; }
> Index: lib/CodeGen/MIRParser/MIParser.cpp
> ===================================================================
> --- lib/CodeGen/MIRParser/MIParser.cpp
> +++ lib/CodeGen/MIRParser/MIParser.cpp
> @@ -34,6 +34,8 @@
> MIToken Token;
> /// Maps from instruction names to op codes.
> StringMap<unsigned> Names2InstrOpCodes;
> + /// Maps from register names to registers.
> + StringMap<unsigned> Names2Regs;
>
> public:
> MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
> @@ -50,14 +52,26 @@
>
> MachineInstr *parse();
>
> + bool parseRegister(unsigned &Reg);
> +
> + bool parseRegisterOperand(MachineOperand &Dest, bool IsDef = false);
> +
> + bool parseMachineOperand(MachineOperand &Dest);
If you're not adding comments (I don't think you need them, since the
names and parameters are fairly clear), I think you should kill the
newlines between these functions.
> +
> private:
> void initNames2InstrOpCodes();
>
> /// Try to convert an instruction name to an opcode. Return true if the
> /// instruction name is invalid.
> bool parseInstrName(StringRef InstrName, unsigned &OpCode);
>
> bool parseInstruction(unsigned &OpCode);
> +
> + void initNames2Regs();
> +
> + /// Try to convert a register name to a register number. Return true if the
> + /// register name is invalid.
> + bool getRegisterByName(StringRef RegName, unsigned &Reg);
> };
>
> } // end anonymous namespace
> @@ -87,13 +101,61 @@
> MachineInstr *MIParser::parse() {
> lex();
>
> + // Parse any register operands before '='
> + // TODO: Allow parsing of multiple operands before '='
> + MachineOperand MO = MachineOperand::CreateImm(0);
> + SmallVector<MachineOperand, 8> Operands;
> + if (Token.isRegister()) {
> + if (parseRegisterOperand(MO, /*IsDef=*/true))
> + return nullptr;
> + Operands.push_back(MO);
> + if (Token.isNot(MIToken::equal)) {
> + error("expected '='");
> + return nullptr;
> + }
> + lex();
> + }
> +
> unsigned OpCode;
> if (Token.isError() || parseInstruction(OpCode))
> return nullptr;
>
> - // TODO: Parse the rest of instruction - machine operands, etc.
> + // TODO: Parse the instruction flags and memory operands.
> +
> + // Parse the remaining machine operands.
> + while (Token.isNot(MIToken::Eof)) {
> + if (parseMachineOperand(MO))
> + return nullptr;
> + Operands.push_back(MO);
> + if (Token.is(MIToken::Eof))
> + break;
> + if (Token.isNot(MIToken::comma)) {
> + error("expected ',' before the next machine operand");
> + return nullptr;
> + }
> + lex();
> + }
> +
> const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode);
> - auto *MI = MF.CreateMachineInstr(MCID, DebugLoc());
> +
> + // Verify machine operands
Add a `.` at the end of the sentence.
> + if (!MCID.isVariadic()) {
> + for (size_t I = 0, E = Operands.size(); I < E; ++I) {
> + if (I < MCID.getNumOperands())
> + continue;
> + // Mark this register as implicit to prevent an assertion when it's added
> + // to an instruction. This is a temporary workaround until the implicit
> + // register flag can be parsed.
> + Operands[I].setImplicit();
> + }
> + }
> +
> + // TODO: Determine the implicit behaviour when implicit register flags are
> + // parsed.
> + auto *MI = MF.CreateMachineInstr(MCID, DebugLoc(), /*NoImplicit=*/true);
> + for (const auto &Operand : Operands) {
> + MI->addOperand(MF, Operand);
> + }
Probably don't need the braces?
> return MI;
> }
>
> @@ -103,6 +165,46 @@
> StringRef InstrName = Token.stringValue();
> if (parseInstrName(InstrName, OpCode))
> return error(Twine("unknown machine instruction name '") + InstrName + "'");
> + lex();
> + return false;
> +}
> +
> +bool MIParser::parseRegister(unsigned &Reg) {
> + switch (Token.kind()) {
> + case MIToken::NamedRegister: {
> + StringRef Name = Token.stringValue().drop_front(1); // Drop the '%'
> + if (getRegisterByName(Name, Reg))
> + return error(Twine("unknown register name '") + Name + "'");
> + break;
> + }
> + // TODO: Parse other register kinds.
> + default:
> + llvm_unreachable("The current token should be a register");
> + }
> + return false;
> +}
> +
> +bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) {
> + unsigned Reg;
> + // TODO: Parse register flags
Add a `.`.
> + if (parseRegister(Reg))
> + return true;
> + lex();
> + // TODO: Parse subregister.
> + Dest = MachineOperand::CreateReg(Reg, IsDef);
> + return false;
> +}
> +
> +bool MIParser::parseMachineOperand(MachineOperand &Dest) {
> + switch (Token.kind()) {
> + case MIToken::NamedRegister:
> + return parseRegisterOperand(Dest);
> + case MIToken::Error:
> + return true;
> + default:
> + // TODO: parse the other machine operands.
> + return error("expected a machine operand");
> + }
> return false;
> }
>
> @@ -125,6 +227,24 @@
> return false;
> }
>
> +void MIParser::initNames2Regs() {
> + if (!Names2Regs.empty())
> + return;
> + const auto *TRI = MF.getSubtarget().getRegisterInfo();
> + assert(TRI && "Expected target register info");
> + for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I)
> + Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I));
Is this safe for registers? (Ahmed?)
Anyway, we should at least check:
for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) {
bool WasInserted =
Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I)).second;
(void)WasInserted;
assert(WasInserted && "Expected registers to be unique case-insensitively");
}
Or maybe there's somewhere we can put this check to catch it at tablegen
time.
> +}
> +
> +bool MIParser::getRegisterByName(StringRef RegName, unsigned &Reg) {
> + initNames2Regs();
> + auto RegInfo = Names2Regs.find(RegName);
> + if (RegInfo == Names2Regs.end())
> + return true;
> + Reg = RegInfo->getValue();
> + return false;
> +}
> +
> MachineInstr *llvm::parseMachineInstr(SourceMgr &SM, MachineFunction &MF,
> StringRef Src, SMDiagnostic &Error) {
> return MIParser(SM, MF, Error, Src).parse();
> Index: lib/CodeGen/MIRPrinter.cpp
> ===================================================================
> --- lib/CodeGen/MIRPrinter.cpp
> +++ lib/CodeGen/MIRPrinter.cpp
> @@ -50,6 +50,8 @@
> MIPrinter(raw_ostream &OS) : OS(OS) {}
>
> void print(const MachineInstr &MI);
> +
I don't think this newline adds any value.
> + void print(const MachineOperand &Op, const TargetRegisterInfo *TRI);
> };
>
> } // end anonymous namespace
> @@ -110,10 +112,57 @@
>
> void MIPrinter::print(const MachineInstr &MI) {
> const auto &SubTarget = MI.getParent()->getParent()->getSubtarget();
> + const auto *TRI = SubTarget.getRegisterInfo();
> + assert(TRI && "Expected target register info");
> const auto *TII = SubTarget.getInstrInfo();
>
> + unsigned I = 0, E = MI.getNumOperands();
> + for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() &&
> + !MI.getOperand(I).isImplicit();
> + ++I) {
> + if (I)
> + OS << ", ";
> + print(MI.getOperand(I), TRI);
> + }
> +
> + if (I)
> + OS << " = ";
> OS << StringRef(TII->getName(MI.getOpcode())).lower();
> - // TODO: Print the instruction flags, machine operands, machine mem operands.
> + // TODO: Print the instruction flags, machine mem operands.
> + if (I < E)
> + OS << ' ';
> +
> + bool NeedComma = false;
> + for (; I < E; ++I) {
> + if (NeedComma)
> + OS << ", ";
> + print(MI.getOperand(I), TRI);
> + NeedComma = true;
> + }
> +}
> +
> +static void printReg(unsigned Reg, raw_ostream &OS,
> + const TargetRegisterInfo *TRI) {
> + // TODO: Print Stack Slots
> + // TODO: Print no register
> + // TODO: Print virtual registers
> + if (Reg < TRI->getNumRegs())
> + OS << '%' << StringRef(TRI->getName(Reg)).lower();
> + else
> + llvm_unreachable("Can't print this kind of register yet");
> +}
> +
> +void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI) {
> + switch (Op.getType()) {
> + case MachineOperand::MO_Register:
> + // TODO: Print register flags.
> + printReg(Op.getReg(), OS, TRI);
> + // TODO: Print sub register.
> + break;
> + default:
> + // TODO: Print the other machine operands.
> + llvm_unreachable("Can't print this machine operand at the moment");
> + }
> }
>
> void llvm::printMIR(raw_ostream &OS, const Module &M) {
> Index: test/CodeGen/MIR/X86/expected-machine-operand.mir
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/MIR/X86/expected-machine-operand.mir
> @@ -0,0 +1,20 @@
> +# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
> +
> +--- |
> +
> + define i32 @foo() {
> + entry:
> + ret i32 0
> + }
> +
> +...
> +---
> +name: foo
> +body:
> + - name: entry
> + instructions:
> + # CHECK: 1:16: expected a machine operand
Just noticed this error doesn't give the correct line number, just a
local line number for the instruction. Why not? I guess I missed this
in the earlier instruction patches? (As long as it's fixable, I don't
think it's urgent to fix immediately, but it's kind of broken.)
> + - '%eax = xor32rr ='
> + - 'retq %eax'
> +...
> +
> Index: test/CodeGen/MIR/X86/missing-comma.mir
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/MIR/X86/missing-comma.mir
> @@ -0,0 +1,20 @@
> +# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
> +
> +--- |
> +
> + define i32 @foo() {
> + entry:
> + ret i32 0
> + }
> +
> +...
> +---
> +name: foo
> +body:
> + - name: entry
> + instructions:
> + # CHECK: 1:21: expected ',' before the next machine operand
> + - '%eax = xor32rr %eax %eflags'
> + - 'retq %eax'
> +...
> +
> Index: test/CodeGen/MIR/X86/named-registers.mir
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/MIR/X86/named-registers.mir
> @@ -0,0 +1,22 @@
> +# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
> +# This test ensures that the MIR parser parses X86 registers coorectly.
> +
> +--- |
> +
> + define i32 @foo() {
> + entry:
> + ret i32 0
> + }
> +
> +...
> +---
> +# CHECK: name: foo
> +name: foo
> +body:
> + - name: entry
> + instructions:
> + # CHECK: - '%eax = mov32r0
> + # CHECK-NEXT: - 'retq %eax
> + - '%eax = mov32r0'
> + - 'retq %eax'
> +...
> Index: test/CodeGen/MIR/X86/unknown-register.mir
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/MIR/X86/unknown-register.mir
> @@ -0,0 +1,21 @@
> +# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
> +# This test ensures that an error is reported when an unknown register is
> +# encountered.
> +
> +--- |
> +
> + define i32 @foo() {
> + entry:
> + ret i32 0
> + }
> +
> +...
> +---
> +name: foo
> +body:
> + - name: entry
> + instructions:
> + # CHECK: 1:1: unknown register name 'xax'
> + - '%xax = mov32r0'
> + - 'retq %xax'
> +...
>
More information about the llvm-commits
mailing list