<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">2015-06-18 17:02 GMT-07:00 Duncan P. N. Exon Smith <span dir="ltr"><<a href="mailto:dexonsmith@apple.com" target="_blank">dexonsmith@apple.com</a>></span>:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="HOEnZb"><div class="h5"><br>
> On 2015-Jun-18, at 15:30, Alex Lorenz <<a href="mailto:arphaman@gmail.com">arphaman@gmail.com</a>> wrote:<br>
><br>
> Update this patch to use the new lexing approach that was introduces in the updated patch for <a href="https://urldefense.proofpoint.com/v2/url?u=http-3A__reviews.llvm.org_D10521&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=mQ4LZ2PUj9hpadE3cDHZnIdEwhEBrbAstXeMaFoB9tg&m=p5pLDIaD4Qe0hCKMJFfSNf18frBcL7O2Qz7c21gizxY&s=YR_owslIg3g6XkhKh2zvuzcGJXTDIUBiD4iMvoTnEuI&e=" rel="noreferrer" target="_blank">http://reviews.llvm.org/D10521</a>.<br>
><br>
><br>
> REPOSITORY<br>
> rL LLVM<br>
><br>
> <a href="https://urldefense.proofpoint.com/v2/url?u=http-3A__reviews.llvm.org_D10525&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=mQ4LZ2PUj9hpadE3cDHZnIdEwhEBrbAstXeMaFoB9tg&m=p5pLDIaD4Qe0hCKMJFfSNf18frBcL7O2Qz7c21gizxY&s=SVTiYXxxJksg-FV0OW2MY7sdnKOEjUS0dXovQlYz5As&e=" rel="noreferrer" target="_blank">http://reviews.llvm.org/D10525</a><br>
><br>
> Files:<br>
> lib/CodeGen/MIRParser/MILexer.cpp<br>
> lib/CodeGen/MIRParser/MILexer.h<br>
> lib/CodeGen/MIRParser/MIParser.cpp<br>
> lib/CodeGen/MIRPrinter.cpp<br>
> test/CodeGen/MIR/X86/expected-machine-operand.mir<br>
> test/CodeGen/MIR/X86/missing-comma.mir<br>
> test/CodeGen/MIR/X86/named-registers.mir<br>
> test/CodeGen/MIR/X86/unknown-register.mir<br>
><br>
> EMAIL PREFERENCES<br>
> <a href="https://urldefense.proofpoint.com/v2/url?u=http-3A__reviews.llvm.org_settings_panel_emailpreferences_&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=mQ4LZ2PUj9hpadE3cDHZnIdEwhEBrbAstXeMaFoB9tg&m=p5pLDIaD4Qe0hCKMJFfSNf18frBcL7O2Qz7c21gizxY&s=vgNUCWlSBCy3uASe9YIgPuKO3Pid0E24RUPCu4h_QUU&e=" rel="noreferrer" target="_blank">http://reviews.llvm.org/settings/panel/emailpreferences/</a><br>
</div></div>> <D10525.27967.patch><br>
<br>
I have a few comments below. Also, I was hoping Ahmed (or someone else<br>
with MIR-level familiarity) could comment on the case-insensitive<br>
register names. Are we safer here than with instruction opcodes?<br>
<br>
> Index: lib/CodeGen/MIRParser/MILexer.cpp<br>
> ===================================================================<br>
> --- lib/CodeGen/MIRParser/MILexer.cpp<br>
> +++ lib/CodeGen/MIRParser/MILexer.cpp<br>
> @@ -67,6 +67,33 @@<br>
> return C;<br>
> }<br>
><br>
> +static Cursor lexPercent(Cursor C, MIToken &Token) {<br>
> + auto Range = C;<br>
> + C.advance(); // Skip '%'<br>
> + while (isIdentifierChar(C.peek()))<br>
> + C.advance();<br>
> + Token = MIToken(MIToken::NamedRegister, Range.upto(C));<br>
> + return C;<br>
> +}<br>
> +<br>
> +static MIToken::TokenKind symbolToken(char C) {<br>
> + switch (C) {<br>
> + case ',':<br>
> + return MIToken::comma;<br>
> + case '=':<br>
> + return MIToken::equal;<br>
> + default:<br>
> + return MIToken::Error;<br>
> + }<br>
> +}<br>
> +<br>
> +static Cursor lexSymbol(Cursor C, MIToken::TokenKind Kind, MIToken &Token) {<br>
> + auto Range = C;<br>
> + C.advance();<br>
> + Token = MIToken(Kind, Range.upto(C));<br>
> + return C;<br>
> +}<br>
> +<br>
> MILexer::MILexer(StringRef Source) : CurrentSource(Source) {}<br>
><br>
> StringRef MILexer::lexToken(MIToken &Token) {<br>
> @@ -79,6 +106,11 @@<br>
> auto Char = C.peek();<br>
> if (isalpha(Char) || Char == '_')<br>
> return lexIdentifier(C, Token).remaining();<br>
> + if (Char == '%')<br>
> + return lexPercent(C, Token).remaining();<br>
> + MIToken::TokenKind Kind = symbolToken(Char);<br>
> + if (Kind != MIToken::Error)<br>
> + return lexSymbol(C, Kind, Token).remaining();<br>
> error(C.location(), Twine("unexpected character '") + Twine(Char) + "'");<br>
> return C.remaining();<br>
> }<br>
> Index: lib/CodeGen/MIRParser/MILexer.h<br>
> ===================================================================<br>
> --- lib/CodeGen/MIRParser/MILexer.h<br>
> +++ lib/CodeGen/MIRParser/MILexer.h<br>
> @@ -26,8 +26,13 @@<br>
> Eof,<br>
> Error,<br>
><br>
> + // Tokens with no info.<br>
> + comma,<br>
> + equal,<br>
> +<br>
> // Identifier tokens<br>
> - Identifier<br>
> + Identifier,<br>
> + NamedRegister<br>
> };<br>
><br>
> private:<br>
> @@ -41,6 +46,8 @@<br>
><br>
> bool isError() const { return Kind == Error; }<br>
><br>
> + bool isRegister() const { return Kind == NamedRegister; }<br>
> +<br>
> bool is(TokenKind K) const { return Kind == K; }<br>
><br>
> bool isNot(TokenKind K) const { return Kind != K; }<br>
> Index: lib/CodeGen/MIRParser/MIParser.cpp<br>
> ===================================================================<br>
> --- lib/CodeGen/MIRParser/MIParser.cpp<br>
> +++ lib/CodeGen/MIRParser/MIParser.cpp<br>
> @@ -34,6 +34,8 @@<br>
> MIToken Token;<br>
> /// Maps from instruction names to op codes.<br>
> StringMap<unsigned> Names2InstrOpCodes;<br>
> + /// Maps from register names to registers.<br>
> + StringMap<unsigned> Names2Regs;<br>
><br>
> public:<br>
> MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,<br>
> @@ -50,14 +52,26 @@<br>
><br>
> MachineInstr *parse();<br>
><br>
> + bool parseRegister(unsigned &Reg);<br>
> +<br>
> + bool parseRegisterOperand(MachineOperand &Dest, bool IsDef = false);<br>
> +<br>
> + bool parseMachineOperand(MachineOperand &Dest);<br>
<br>
If you're not adding comments (I don't think you need them, since the<br>
names and parameters are fairly clear), I think you should kill the<br>
newlines between these functions.<br>
<br>
> +<br>
> private:<br>
> void initNames2InstrOpCodes();<br>
><br>
> /// Try to convert an instruction name to an opcode. Return true if the<br>
> /// instruction name is invalid.<br>
> bool parseInstrName(StringRef InstrName, unsigned &OpCode);<br>
><br>
> bool parseInstruction(unsigned &OpCode);<br>
> +<br>
> + void initNames2Regs();<br>
> +<br>
> + /// Try to convert a register name to a register number. Return true if the<br>
> + /// register name is invalid.<br>
> + bool getRegisterByName(StringRef RegName, unsigned &Reg);<br>
> };<br>
><br>
> } // end anonymous namespace<br>
> @@ -87,13 +101,61 @@<br>
> MachineInstr *MIParser::parse() {<br>
> lex();<br>
><br>
> + // Parse any register operands before '='<br>
> + // TODO: Allow parsing of multiple operands before '='<br>
> + MachineOperand MO = MachineOperand::CreateImm(0);<br>
> + SmallVector<MachineOperand, 8> Operands;<br>
> + if (Token.isRegister()) {<br>
> + if (parseRegisterOperand(MO, /*IsDef=*/true))<br>
> + return nullptr;<br>
> + Operands.push_back(MO);<br>
> + if (Token.isNot(MIToken::equal)) {<br>
> + error("expected '='");<br>
> + return nullptr;<br>
> + }<br>
> + lex();<br>
> + }<br>
> +<br>
> unsigned OpCode;<br>
> if (Token.isError() || parseInstruction(OpCode))<br>
> return nullptr;<br>
><br>
> - // TODO: Parse the rest of instruction - machine operands, etc.<br>
> + // TODO: Parse the instruction flags and memory operands.<br>
> +<br>
> + // Parse the remaining machine operands.<br>
> + while (Token.isNot(MIToken::Eof)) {<br>
> + if (parseMachineOperand(MO))<br>
> + return nullptr;<br>
> + Operands.push_back(MO);<br>
> + if (Token.is(MIToken::Eof))<br>
> + break;<br>
> + if (Token.isNot(MIToken::comma)) {<br>
> + error("expected ',' before the next machine operand");<br>
> + return nullptr;<br>
> + }<br>
> + lex();<br>
> + }<br>
> +<br>
> const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode);<br>
> - auto *MI = MF.CreateMachineInstr(MCID, DebugLoc());<br>
> +<br>
> + // Verify machine operands<br>
<br>
Add a `.` at the end of the sentence.<br>
<br>
> + if (!MCID.isVariadic()) {<br>
> + for (size_t I = 0, E = Operands.size(); I < E; ++I) {<br>
> + if (I < MCID.getNumOperands())<br>
> + continue;<br>
> + // Mark this register as implicit to prevent an assertion when it's added<br>
> + // to an instruction. This is a temporary workaround until the implicit<br>
> + // register flag can be parsed.<br>
> + Operands[I].setImplicit();<br>
> + }<br>
> + }<br>
> +<br>
> + // TODO: Determine the implicit behaviour when implicit register flags are<br>
> + // parsed.<br>
> + auto *MI = MF.CreateMachineInstr(MCID, DebugLoc(), /*NoImplicit=*/true);<br>
> + for (const auto &Operand : Operands) {<br>
> + MI->addOperand(MF, Operand);<br>
> + }<br>
<br>
Probably don't need the braces?<br>
<br>
> return MI;<br>
> }<br>
><br>
> @@ -103,6 +165,46 @@<br>
> StringRef InstrName = Token.stringValue();<br>
> if (parseInstrName(InstrName, OpCode))<br>
> return error(Twine("unknown machine instruction name '") + InstrName + "'");<br>
> + lex();<br>
> + return false;<br>
> +}<br>
> +<br>
> +bool MIParser::parseRegister(unsigned &Reg) {<br>
> + switch (Token.kind()) {<br>
> + case MIToken::NamedRegister: {<br>
> + StringRef Name = Token.stringValue().drop_front(1); // Drop the '%'<br>
> + if (getRegisterByName(Name, Reg))<br>
> + return error(Twine("unknown register name '") + Name + "'");<br>
> + break;<br>
> + }<br>
> + // TODO: Parse other register kinds.<br>
> + default:<br>
> + llvm_unreachable("The current token should be a register");<br>
> + }<br>
> + return false;<br>
> +}<br>
> +<br>
> +bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) {<br>
> + unsigned Reg;<br>
> + // TODO: Parse register flags<br>
<br>
Add a `.`.<br>
<br>
> + if (parseRegister(Reg))<br>
> + return true;<br>
> + lex();<br>
> + // TODO: Parse subregister.<br>
> + Dest = MachineOperand::CreateReg(Reg, IsDef);<br>
> + return false;<br>
> +}<br>
> +<br>
> +bool MIParser::parseMachineOperand(MachineOperand &Dest) {<br>
> + switch (Token.kind()) {<br>
> + case MIToken::NamedRegister:<br>
> + return parseRegisterOperand(Dest);<br>
> + case MIToken::Error:<br>
> + return true;<br>
> + default:<br>
> + // TODO: parse the other machine operands.<br>
> + return error("expected a machine operand");<br>
> + }<br>
> return false;<br>
> }<br>
><br>
> @@ -125,6 +227,24 @@<br>
> return false;<br>
> }<br>
><br>
> +void MIParser::initNames2Regs() {<br>
> + if (!Names2Regs.empty())<br>
> + return;<br>
> + const auto *TRI = MF.getSubtarget().getRegisterInfo();<br>
> + assert(TRI && "Expected target register info");<br>
> + for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I)<br>
> + Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I));<br>
<br>
Is this safe for registers? (Ahmed?)<br>
<br>
Anyway, we should at least check:<br>
<br>
for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) {<br>
bool WasInserted =<br>
Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I)).second;<br>
(void)WasInserted;<br>
assert(WasInserted && "Expected registers to be unique case-insensitively");<br>
}<br>
<br>
Or maybe there's somewhere we can put this check to catch it at tablegen<br>
time.<br>
<br>
> +}<br>
> +<br>
> +bool MIParser::getRegisterByName(StringRef RegName, unsigned &Reg) {<br>
> + initNames2Regs();<br>
> + auto RegInfo = Names2Regs.find(RegName);<br>
> + if (RegInfo == Names2Regs.end())<br>
> + return true;<br>
> + Reg = RegInfo->getValue();<br>
> + return false;<br>
> +}<br>
> +<br>
> MachineInstr *llvm::parseMachineInstr(SourceMgr &SM, MachineFunction &MF,<br>
> StringRef Src, SMDiagnostic &Error) {<br>
> return MIParser(SM, MF, Error, Src).parse();<br>
> Index: lib/CodeGen/MIRPrinter.cpp<br>
> ===================================================================<br>
> --- lib/CodeGen/MIRPrinter.cpp<br>
> +++ lib/CodeGen/MIRPrinter.cpp<br>
> @@ -50,6 +50,8 @@<br>
> MIPrinter(raw_ostream &OS) : OS(OS) {}<br>
><br>
> void print(const MachineInstr &MI);<br>
> +<br>
<br>
I don't think this newline adds any value.<br>
<br>
> + void print(const MachineOperand &Op, const TargetRegisterInfo *TRI);<br>
> };<br>
><br>
> } // end anonymous namespace<br>
> @@ -110,10 +112,57 @@<br>
><br>
> void MIPrinter::print(const MachineInstr &MI) {<br>
> const auto &SubTarget = MI.getParent()->getParent()->getSubtarget();<br>
> + const auto *TRI = SubTarget.getRegisterInfo();<br>
> + assert(TRI && "Expected target register info");<br>
> const auto *TII = SubTarget.getInstrInfo();<br>
><br>
> + unsigned I = 0, E = MI.getNumOperands();<br>
> + for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() &&<br>
> + !MI.getOperand(I).isImplicit();<br>
> + ++I) {<br>
> + if (I)<br>
> + OS << ", ";<br>
> + print(MI.getOperand(I), TRI);<br>
> + }<br>
> +<br>
> + if (I)<br>
> + OS << " = ";<br>
> OS << StringRef(TII->getName(MI.getOpcode())).lower();<br>
> - // TODO: Print the instruction flags, machine operands, machine mem operands.<br>
> + // TODO: Print the instruction flags, machine mem operands.<br>
> + if (I < E)<br>
> + OS << ' ';<br>
> +<br>
> + bool NeedComma = false;<br>
> + for (; I < E; ++I) {<br>
> + if (NeedComma)<br>
> + OS << ", ";<br>
> + print(MI.getOperand(I), TRI);<br>
> + NeedComma = true;<br>
> + }<br>
> +}<br>
> +<br>
> +static void printReg(unsigned Reg, raw_ostream &OS,<br>
> + const TargetRegisterInfo *TRI) {<br>
> + // TODO: Print Stack Slots<br>
> + // TODO: Print no register<br>
> + // TODO: Print virtual registers<br>
> + if (Reg < TRI->getNumRegs())<br>
> + OS << '%' << StringRef(TRI->getName(Reg)).lower();<br>
> + else<br>
> + llvm_unreachable("Can't print this kind of register yet");<br>
> +}<br>
> +<br>
> +void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI) {<br>
> + switch (Op.getType()) {<br>
> + case MachineOperand::MO_Register:<br>
> + // TODO: Print register flags.<br>
> + printReg(Op.getReg(), OS, TRI);<br>
> + // TODO: Print sub register.<br>
> + break;<br>
> + default:<br>
> + // TODO: Print the other machine operands.<br>
> + llvm_unreachable("Can't print this machine operand at the moment");<br>
> + }<br>
> }<br>
><br>
> void llvm::printMIR(raw_ostream &OS, const Module &M) {<br>
> Index: test/CodeGen/MIR/X86/expected-machine-operand.mir<br>
> ===================================================================<br>
> --- /dev/null<br>
> +++ test/CodeGen/MIR/X86/expected-machine-operand.mir<br>
> @@ -0,0 +1,20 @@<br>
> +# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s<br>
> +<br>
> +--- |<br>
> +<br>
> + define i32 @foo() {<br>
> + entry:<br>
> + ret i32 0<br>
> + }<br>
> +<br>
> +...<br>
> +---<br>
> +name: foo<br>
> +body:<br>
> + - name: entry<br>
> + instructions:<br>
> + # CHECK: 1:16: expected a machine operand<br>
<br>
Just noticed this error doesn't give the correct line number, just a<br>
local line number for the instruction. Why not? I guess I missed this<br>
in the earlier instruction patches? (As long as it's fixable, I don't<br>
think it's urgent to fix immediately, but it's kind of broken.)<br></blockquote><div><br></div><div>There's a TODO for this in the previous lexing patch, the MI string error locations aren't translated into the full file locations yet. There will be a separate patch that will fix this.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
> + - '%eax = xor32rr ='<br>
> + - 'retq %eax'<br>
> +...<br>
> +<br>
> Index: test/CodeGen/MIR/X86/missing-comma.mir<br>
> ===================================================================<br>
> --- /dev/null<br>
> +++ test/CodeGen/MIR/X86/missing-comma.mir<br>
> @@ -0,0 +1,20 @@<br>
> +# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s<br>
> +<br>
> +--- |<br>
> +<br>
> + define i32 @foo() {<br>
> + entry:<br>
> + ret i32 0<br>
> + }<br>
> +<br>
> +...<br>
> +---<br>
> +name: foo<br>
> +body:<br>
> + - name: entry<br>
> + instructions:<br>
> + # CHECK: 1:21: expected ',' before the next machine operand<br>
> + - '%eax = xor32rr %eax %eflags'<br>
> + - 'retq %eax'<br>
> +...<br>
> +<br>
> Index: test/CodeGen/MIR/X86/named-registers.mir<br>
> ===================================================================<br>
> --- /dev/null<br>
> +++ test/CodeGen/MIR/X86/named-registers.mir<br>
> @@ -0,0 +1,22 @@<br>
> +# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s<br>
> +# This test ensures that the MIR parser parses X86 registers coorectly.<br>
> +<br>
> +--- |<br>
> +<br>
> + define i32 @foo() {<br>
> + entry:<br>
> + ret i32 0<br>
> + }<br>
> +<br>
> +...<br>
> +---<br>
> +# CHECK: name: foo<br>
> +name: foo<br>
> +body:<br>
> + - name: entry<br>
> + instructions:<br>
> + # CHECK: - '%eax = mov32r0<br>
> + # CHECK-NEXT: - 'retq %eax<br>
> + - '%eax = mov32r0'<br>
> + - 'retq %eax'<br>
> +...<br>
> Index: test/CodeGen/MIR/X86/unknown-register.mir<br>
> ===================================================================<br>
> --- /dev/null<br>
> +++ test/CodeGen/MIR/X86/unknown-register.mir<br>
> @@ -0,0 +1,21 @@<br>
> +# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s<br>
> +# This test ensures that an error is reported when an unknown register is<br>
> +# encountered.<br>
> +<br>
> +--- |<br>
> +<br>
> + define i32 @foo() {<br>
> + entry:<br>
> + ret i32 0<br>
> + }<br>
> +<br>
> +...<br>
> +---<br>
> +name: foo<br>
> +body:<br>
> + - name: entry<br>
> + instructions:<br>
> + # CHECK: 1:1: unknown register name 'xax'<br>
> + - '%xax = mov32r0'<br>
> + - 'retq %xax'<br>
> +...<br>
><br>
<br>
</blockquote></div><br></div></div>