[llvm-commits] [llvm] r83424 - /llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
Daniel Dunbar
daniel at zuster.org
Mon Oct 26 09:02:25 PDT 2009
Hi Kevin,
A few comments inline:
On Tue, Oct 6, 2009 at 3:26 PM, Kevin Enderby <enderby at apple.com> wrote:
> Author: enderby
> Date: Tue Oct 6 17:26:42 2009
> New Revision: 83424
>
> URL: http://llvm.org/viewvc/llvm-project?rev=83424&view=rev
> Log:
> Added bits of the ARM target assembler to llvm-mc to parse some load instruction
> operands. Some parsing of arm memory operands for preindexing and postindexing
> forms including with register controled shifts. This is a work in progress.
>
> Modified:
> llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
>
> Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=83424&r1=83423&r2=83424&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original)
> +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Tue Oct 6 17:26:42 2009
> @@ -23,6 +23,15 @@
> namespace {
> struct ARMOperand;
>
> +// The shift types for register controlled shifts in arm memory addressing
> +enum ShiftType {
> + Lsl,
> + Lsr,
> + Asr,
> + Ror,
> + Rrx
> +};
> +
> class ARMAsmParser : public TargetAsmParser {
> MCAsmParser &Parser;
>
> @@ -35,8 +44,31 @@
>
> bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
>
> + bool ParseRegister(ARMOperand &Op);
> +
> + bool ParseMemory(ARMOperand &Op);
> +
> + bool ParseShift(enum ShiftType *St, const MCExpr *ShiftAmount);
> +
> + bool ParseOperand(ARMOperand &Op);
> +
> bool ParseDirectiveWord(unsigned Size, SMLoc L);
>
> + // TODO - For now hacked versions of the next two are in here in this file to
> + // allow some parser testing until the table gen versions are implemented.
> +
> + /// @name Auto-generated Match Functions
> + /// {
> + bool MatchInstruction(SmallVectorImpl<ARMOperand> &Operands,
> + MCInst &Inst);
> +
> + /// MatchRegisterName - Match the given string to a register name, or 0 if
> + /// there is no match.
> + unsigned MatchRegisterName(const StringRef &Name);
> +
> + /// }
> +
> +
> public:
> ARMAsmParser(const Target &T, MCAsmParser &_Parser)
> : TargetAsmParser(T), Parser(_Parser) {}
> @@ -48,9 +80,380 @@
>
> } // end anonymous namespace
>
> +namespace {
> +
> +/// ARMOperand - Instances of this class represent a parsed ARM machine
> +/// instruction.
> +struct ARMOperand {
> + enum {
> + Token,
> + Register,
> + Memory
> + } Kind;
> +
> +
> + union {
> + struct {
> + const char *Data;
> + unsigned Length;
> + } Tok;
> +
> + struct {
> + unsigned RegNum;
> + } Reg;
> +
> + // This is for all forms of ARM address expressions
> + struct {
> + unsigned BaseRegNum;
> + bool OffsetIsReg;
> + const MCExpr *Offset; // used when OffsetIsReg is false
> + unsigned OffsetRegNum; // used when OffsetIsReg is true
> + bool OffsetRegShifted; // only used when OffsetIsReg is true
> + enum ShiftType ShiftType; // used when OffsetRegShifted is true
> + const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
> + bool Preindexed;
> + bool Postindexed;
> + bool Negative; // only used when OffsetIsReg is true
> + bool Writeback;
> + } Mem;
It probably makes sense to reorder these fields to make the struct
smaller (bool -> 'unsigned foo : 1', and make MCExpr* fields
adjacent).
> +// Try to parse a register name. The token must be an Identifier when called,
> +// and if it is a register name a Reg operand is created, the token is eaten
> +// and false is returned. Else true is returned and no token is eaten.
> +// TODO this is likely to change to allow different register types and or to
> +// parse for a specific register type.
Please use '///' instead of '//' so the comment gets picked up by
doxygen. This applies in a number of places in this file.
> +bool ARMAsmParser::ParseRegister(ARMOperand &Op) {
> + const AsmToken &Tok = getLexer().getTok();
> + assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
> +
> + // FIXME: Validate register for the current architecture; we have to do
> + // validation later, so maybe there is no need for this here.
> + unsigned RegNum;
> +
> + RegNum = MatchRegisterName(Tok.getString());
> + if (RegNum == 0)
> + return true;
> +
> + Op = ARMOperand::CreateReg(RegNum);
> + getLexer().Lex(); // Eat identifier token.
> +
> + return false;
> +}
Since this is a "try parse" routine, and doesn't emit errors or
consume tokens on failure, it should be called MaybeParseRegister.
> +
> +// Try to parse an arm memory expression. It must start with a '[' token.
> +// TODO Only preindexing and postindexing addressing are started, unindexed
> +// with option, etc are still to do.
> +bool ARMAsmParser::ParseMemory(ARMOperand &Op) {
> + const AsmToken &LBracTok = getLexer().getTok();
> + assert(LBracTok.is(AsmToken::LBrac) && "Token is not an Left Bracket");
> + getLexer().Lex(); // Eat left bracket token.
> +
> + const AsmToken &BaseRegTok = getLexer().getTok();
> + if (BaseRegTok.isNot(AsmToken::Identifier))
> + return Error(BaseRegTok.getLoc(), "register expected");
> + unsigned BaseRegNum = MatchRegisterName(BaseRegTok.getString());
> + if (BaseRegNum == 0)
> + return Error(BaseRegTok.getLoc(), "register expected");
Shouldn't this just call ParseRegister?
> + getLexer().Lex(); // Eat identifier token.
> +
> + bool Preindexed = false;
> + bool Postindexed = false;
> + bool OffsetIsReg = false;
> + bool Negative = false;
> + bool Writeback = false;
> +
> + // First look for preindexed address forms:
> + // [Rn, +/-Rm]
> + // [Rn, #offset]
> + // [Rn, +/-Rm, shift]
> + // that is after the "[Rn" we now have see if the next token is a comma.
> + const AsmToken &Tok = getLexer().getTok();
> + if (Tok.is(AsmToken::Comma)) {
> + Preindexed = true;
> + getLexer().Lex(); // Eat comma token.
> +
> + const AsmToken &NextTok = getLexer().getTok();
> + if (NextTok.is(AsmToken::Plus))
> + getLexer().Lex(); // Eat plus token.
> + else if (NextTok.is(AsmToken::Minus)) {
> + Negative = true;
> + getLexer().Lex(); // Eat minus token
> + }
> +
> + // See if there is a register following the "[Rn," we have so far.
> + const AsmToken &OffsetRegTok = getLexer().getTok();
> + unsigned OffsetRegNum = MatchRegisterName(OffsetRegTok.getString());
Again, I think this should just call ParseRegister.
Is a plus/minus not required here? This code will accept [Rn, Rm], is
that intended? If so the comment above listing the forms should be
updated.
> + bool OffsetRegShifted = false;
> + enum ShiftType ShiftType;
> + const MCExpr *ShiftAmount;
> + const MCExpr *Offset;
> + if (OffsetRegNum != 0) {
> + OffsetIsReg = true;
> + getLexer().Lex(); // Eat identifier token for the offset register.
> + // Look for a comma then a shift
> + const AsmToken &Tok = getLexer().getTok();
> + if (Tok.is(AsmToken::Comma)) {
> + getLexer().Lex(); // Eat comma token.
> +
> + const AsmToken &Tok = getLexer().getTok();
> + if (ParseShift(&ShiftType, ShiftAmount))
> + return Error(Tok.getLoc(), "shift expected");
> + OffsetRegShifted = true;
> + }
> + }
> + else { // "[Rn," we have so far was not followed by "Rm"
> + // Look for #offset following the "[Rn,"
> + const AsmToken &HashTok = getLexer().getTok();
> + if (HashTok.isNot(AsmToken::Hash))
> + return Error(HashTok.getLoc(), "'#' expected");
> + getLexer().Lex(); // Eat hash token.
> +
> + if (getParser().ParseExpression(Offset))
> + return true;
> + }
> + const AsmToken &RBracTok = getLexer().getTok();
> + if (RBracTok.isNot(AsmToken::RBrac))
> + return Error(RBracTok.getLoc(), "']' expected");
> + getLexer().Lex(); // Eat right bracket token.
> +
> + const AsmToken &ExclaimTok = getLexer().getTok();
> + if (ExclaimTok.is(AsmToken::Exclaim)) {
> + Writeback = true;
> + getLexer().Lex(); // Eat exclaim token
> + }
Can you please include an example of this form in the list of forms
this routine is parsing?
> + Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
> + OffsetRegShifted, ShiftType, ShiftAmount,
> + Preindexed, Postindexed, Negative, Writeback);
> + return false;
> + }
> + // The "[Rn" we have so far was not followed by a comma.
> + else if (Tok.is(AsmToken::RBrac)) {
> + // This is a post indexing addressing forms:
> + // [Rn], #offset
> + // [Rn], +/-Rm
> + // [Rn], +/-Rm, shift
> + // that is a ']' follows after the "[Rn".
> + Postindexed = true;
> + Writeback = true;
> + getLexer().Lex(); // Eat right bracket token.
> +
> + const AsmToken &CommaTok = getLexer().getTok();
> + if (CommaTok.isNot(AsmToken::Comma))
> + return Error(CommaTok.getLoc(), "',' expected");
> + getLexer().Lex(); // Eat comma token.
> +
> + const AsmToken &NextTok = getLexer().getTok();
> + if (NextTok.is(AsmToken::Plus))
> + getLexer().Lex(); // Eat plus token.
> + else if (NextTok.is(AsmToken::Minus)) {
> + Negative = true;
> + getLexer().Lex(); // Eat minus token
> + }
> +
> + // See if there is a register following the "[Rn]," we have so far.
> + const AsmToken &OffsetRegTok = getLexer().getTok();
> + unsigned OffsetRegNum = MatchRegisterName(OffsetRegTok.getString());
> + bool OffsetRegShifted = false;
> + enum ShiftType ShiftType;
> + const MCExpr *ShiftAmount;
> + const MCExpr *Offset;
> + if (OffsetRegNum != 0) {
> + OffsetIsReg = true;
> + getLexer().Lex(); // Eat identifier token for the offset register.
> + // Look for a comma then a shift
> + const AsmToken &Tok = getLexer().getTok();
> + if (Tok.is(AsmToken::Comma)) {
> + getLexer().Lex(); // Eat comma token.
> +
> + const AsmToken &Tok = getLexer().getTok();
> + if (ParseShift(&ShiftType, ShiftAmount))
> + return Error(Tok.getLoc(), "shift expected");
> + OffsetRegShifted = true;
> + }
> + }
> + else { // "[Rn]," we have so far was not followed by "Rm"
> + // Look for #offset following the "[Rn],"
> + const AsmToken &HashTok = getLexer().getTok();
> + if (HashTok.isNot(AsmToken::Hash))
> + return Error(HashTok.getLoc(), "'#' expected");
> + getLexer().Lex(); // Eat hash token.
> +
> + if (getParser().ParseExpression(Offset))
> + return true;
> + }
> + Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
> + OffsetRegShifted, ShiftType, ShiftAmount,
> + Preindexed, Postindexed, Negative, Writeback);
> + return false;
> + }
> +
> + return true;
> +}
I think it might be cleaner to factor this routine into a part that parses:
',' #offset
',' +/-Rm
',' +/-Rm, shift
and then reuse that for both the pre and post indexed forms.
> +/// ParseShift as one of these two:
> +/// ( lsl | lsr | asr | ror ) , # shift_amount
> +/// rrx
> +/// and returns true if it parses a shift otherwise it returns false.
> +bool ARMAsmParser::ParseShift(ShiftType *St, const MCExpr *ShiftAmount) {
This should take a reference to ShiftType instead of taking a pointer
to it (convention).
> + const AsmToken &Tok = getLexer().getTok();
> + if (Tok.isNot(AsmToken::Identifier))
> + return true;
> + const StringRef &ShiftName = Tok.getString();
> + if (ShiftName == "lsl" || ShiftName == "LSL")
> + *St = Lsl;
> + else if (ShiftName == "lsr" || ShiftName == "LSR")
> + *St = Lsr;
> + else if (ShiftName == "asr" || ShiftName == "ASR")
> + *St = Asr;
> + else if (ShiftName == "ror" || ShiftName == "ROR")
> + *St = Ror;
> + else if (ShiftName == "rrx" || ShiftName == "RRX")
> + *St = Rrx;
> + else
> + return true;
> + getLexer().Lex(); // Eat shift type token.
> +
> + // For all but a Rotate right there must be a '#' and a shift amount
> + if (*St != Rrx) {
Chris will be happier if you write this as:
--
// Rrx stands alone.
if (*St == Rrx)
return false;
// Otherwise, there must be a '#' and a shift amount.
...
--
to reduce the nesting. :)
> + // Look for # following the shift type
> + const AsmToken &HashTok = getLexer().getTok();
> + if (HashTok.isNot(AsmToken::Hash))
> + return Error(HashTok.getLoc(), "'#' expected");
> + getLexer().Lex(); // Eat hash token.
> +
> + if (getParser().ParseExpression(ShiftAmount))
> + return true;
> + }
> +
> + return false;
> +}
> +
> +// A hack to allow some testing
> +unsigned ARMAsmParser::MatchRegisterName(const StringRef &Name) {
> + if (Name == "r1")
> + return 1;
> + else if (Name == "r2")
> + return 2;
> + else if (Name == "r3")
> + return 3;
> + return 0;
> +}
> +
> +// A hack to allow some testing
> +bool ARMAsmParser::MatchInstruction(SmallVectorImpl<ARMOperand> &Operands,
> + MCInst &Inst) {
> + struct ARMOperand Op0 = Operands[0];
> + assert(Op0.Kind == ARMOperand::Token && "First operand not a Token");
> + const StringRef &Mnemonic = Op0.getToken();
> + if (Mnemonic == "add" ||
> + Mnemonic == "ldr")
> + return false;
> +
> + return true;
> +}
> +
> +// TODO - this is a work in progress
> +bool ARMAsmParser::ParseOperand(ARMOperand &Op) {
> + switch (getLexer().getKind()) {
> + case AsmToken::Identifier:
> + if (!ParseRegister(Op))
> + return false;
> + // TODO parse other operands that start with an identifier
> + return true;
> + case AsmToken::LBrac:
> + if (!ParseMemory(Op))
> + return false;
> + default:
> + return true;
> + }
> +}
> +
> bool ARMAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) {
> + SmallVector<ARMOperand, 7> Operands;
> +
> + Operands.push_back(ARMOperand::CreateToken(Name));
> +
> SMLoc Loc = getLexer().getTok().getLoc();
> - Error(Loc, "ARMAsmParser::ParseInstruction currently unimplemented");
> + if (getLexer().isNot(AsmToken::EndOfStatement)) {
> +
> + // Read the first operand.
> + Operands.push_back(ARMOperand());
> + if (ParseOperand(Operands.back()))
> + return true;
> +
> + while (getLexer().is(AsmToken::Comma)) {
> + getLexer().Lex(); // Eat the comma.
> +
> + // Parse and remember the operand.
> + Operands.push_back(ARMOperand());
> + if (ParseOperand(Operands.back()))
> + return true;
> + }
> + }
> + if (!MatchInstruction(Operands, Inst))
> + return false;
> +
> + Error(Loc, "ARMAsmParser::ParseInstruction only partly implemented");
> return true;
> }
Thanks!
- Daniel
More information about the llvm-commits
mailing list