[llvm-commits] [llvm] r83424 - /llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp

Mon Oct 26 09:02:25 PDT 2009

Hi Kevin,

A few comments inline:

On Tue, Oct 6, 2009 at 3:26 PM, Kevin Enderby <enderby at apple.com> wrote:
> Author: enderby
> Date: Tue Oct  6 17:26:42 2009
> New Revision: 83424
>
> URL: http://llvm.org/viewvc/llvm-project?rev=83424&view=rev
> Log:
> Added bits of the ARM target assembler to llvm-mc to parse some load instruction
> operands.  Some parsing of arm memory operands for preindexing and postindexing
> forms including with register controled shifts.  This is a work in progress.
>
> Modified:
>    llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
>
> Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=83424&r1=83423&r2=83424&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original)
> +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Tue Oct  6 17:26:42 2009
> @@ -23,6 +23,15 @@
>  namespace {
>  struct ARMOperand;
>
> +// The shift types for register controlled shifts in arm memory addressing
> +enum ShiftType {
> +  Lsl,
> +  Lsr,
> +  Asr,
> +  Ror,
> +  Rrx
> +};
> +
>  class ARMAsmParser : public TargetAsmParser {
>   MCAsmParser &Parser;
>
> @@ -35,8 +44,31 @@
>
>   bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
>
> +  bool ParseRegister(ARMOperand &Op);
> +
> +  bool ParseMemory(ARMOperand &Op);
> +
> +  bool ParseShift(enum ShiftType *St, const MCExpr *ShiftAmount);
> +
> +  bool ParseOperand(ARMOperand &Op);
> +
>   bool ParseDirectiveWord(unsigned Size, SMLoc L);
>
> +  // TODO - For now hacked versions of the next two are in here in this file to
> +  // allow some parser testing until the table gen versions are implemented.
> +
> +  /// @name Auto-generated Match Functions
> +  /// {
> +  bool MatchInstruction(SmallVectorImpl<ARMOperand> &Operands,
> +                        MCInst &Inst);
> +
> +  /// MatchRegisterName - Match the given string to a register name, or 0 if
> +  /// there is no match.
> +  unsigned MatchRegisterName(const StringRef &Name);
> +
> +  /// }
> +
> +
>  public:
>   ARMAsmParser(const Target &T, MCAsmParser &_Parser)
>     : TargetAsmParser(T), Parser(_Parser) {}
> @@ -48,9 +80,380 @@
>
>  } // end anonymous namespace
>
> +namespace {
> +
> +/// ARMOperand - Instances of this class represent a parsed ARM machine
> +/// instruction.
> +struct ARMOperand {
> +  enum {
> +    Token,
> +    Register,
> +    Memory
> +  } Kind;
> +
> +
> +  union {
> +    struct {
> +      const char *Data;
> +      unsigned Length;
> +    } Tok;
> +
> +    struct {
> +      unsigned RegNum;
> +    } Reg;
> +
> +    // This is for all forms of ARM address expressions
> +    struct {
> +      unsigned BaseRegNum;
> +      bool OffsetIsReg;
> +      const MCExpr *Offset; // used when OffsetIsReg is false
> +      unsigned OffsetRegNum; // used when OffsetIsReg is true
> +      bool OffsetRegShifted; // only used when OffsetIsReg is true
> +      enum ShiftType ShiftType;  // used when OffsetRegShifted is true
> +      const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
> +      bool Preindexed;
> +      bool Postindexed;
> +      bool Negative; // only used when OffsetIsReg is true
> +      bool Writeback;
> +    } Mem;

It probably makes sense to reorder these fields to make the struct
smaller (bool -> 'unsigned foo : 1', and make MCExpr* fields
adjacent).

> +// Try to parse a register name.  The token must be an Identifier when called,
> +// and if it is a register name a Reg operand is created, the token is eaten
> +// and false is returned.  Else true is returned and no token is eaten.
> +// TODO this is likely to change to allow different register types and or to
> +// parse for a specific register type.

Please use '///' instead of '//' so the comment gets picked up by
doxygen. This applies in a number of places in this file.

> +bool ARMAsmParser::ParseRegister(ARMOperand &Op) {
> +  const AsmToken &Tok = getLexer().getTok();
> +  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
> +
> +  // FIXME: Validate register for the current architecture; we have to do
> +  // validation later, so maybe there is no need for this here.
> +  unsigned RegNum;
> +
> +  RegNum = MatchRegisterName(Tok.getString());
> +  if (RegNum == 0)
> +    return true;
> +
> +  Op = ARMOperand::CreateReg(RegNum);
> +  getLexer().Lex(); // Eat identifier token.
> +
> +  return false;
> +}

Since this is a "try parse" routine, and doesn't emit errors or
consume tokens on failure, it should be called MaybeParseRegister.

> +
> +// Try to parse an arm memory expression.  It must start with a '[' token.
> +// TODO Only preindexing and postindexing addressing are started, unindexed
> +// with option, etc are still to do.
> +bool ARMAsmParser::ParseMemory(ARMOperand &Op) {
> +  const AsmToken &LBracTok = getLexer().getTok();
> +  assert(LBracTok.is(AsmToken::LBrac) && "Token is not an Left Bracket");
> +  getLexer().Lex(); // Eat left bracket token.
> +
> +  const AsmToken &BaseRegTok = getLexer().getTok();
> +  if (BaseRegTok.isNot(AsmToken::Identifier))
> +    return Error(BaseRegTok.getLoc(), "register expected");
> +  unsigned BaseRegNum = MatchRegisterName(BaseRegTok.getString());
> +  if (BaseRegNum == 0)
> +    return Error(BaseRegTok.getLoc(), "register expected");

Shouldn't this just call ParseRegister?

> +  getLexer().Lex(); // Eat identifier token.
> +
> +  bool Preindexed = false;
> +  bool Postindexed = false;
> +  bool OffsetIsReg = false;
> +  bool Negative = false;
> +  bool Writeback = false;
> +
> +  // First look for preindexed address forms:
> +  //  [Rn, +/-Rm]
> +  //  [Rn, #offset]
> +  //  [Rn, +/-Rm, shift]
> +  // that is after the "[Rn" we now have see if the next token is a comma.
> +  const AsmToken &Tok = getLexer().getTok();
> +  if (Tok.is(AsmToken::Comma)) {
> +    Preindexed = true;
> +    getLexer().Lex(); // Eat comma token.
> +
> +    const AsmToken &NextTok = getLexer().getTok();
> +    if (NextTok.is(AsmToken::Plus))
> +      getLexer().Lex(); // Eat plus token.
> +    else if (NextTok.is(AsmToken::Minus)) {
> +      Negative = true;
> +      getLexer().Lex(); // Eat minus token
> +    }
> +
> +    // See if there is a register following the "[Rn," we have so far.
> +    const AsmToken &OffsetRegTok = getLexer().getTok();
> +    unsigned OffsetRegNum = MatchRegisterName(OffsetRegTok.getString());

Again, I think this should just call ParseRegister.

Is a plus/minus not required here? This code will accept [Rn, Rm], is
that intended? If so the comment above listing the forms should be
updated.

> +    bool OffsetRegShifted = false;
> +    enum ShiftType ShiftType;
> +    const MCExpr *ShiftAmount;
> +    const MCExpr *Offset;
> +    if (OffsetRegNum != 0) {
> +      OffsetIsReg = true;
> +      getLexer().Lex(); // Eat identifier token for the offset register.
> +      // Look for a comma then a shift
> +      const AsmToken &Tok = getLexer().getTok();
> +      if (Tok.is(AsmToken::Comma)) {
> +        getLexer().Lex(); // Eat comma token.
> +
> +        const AsmToken &Tok = getLexer().getTok();
> +        if (ParseShift(&ShiftType, ShiftAmount))
> +          return Error(Tok.getLoc(), "shift expected");
> +        OffsetRegShifted = true;
> +      }
> +    }
> +    else { // "[Rn," we have so far was not followed by "Rm"
> +      // Look for #offset following the "[Rn,"
> +      const AsmToken &HashTok = getLexer().getTok();
> +      if (HashTok.isNot(AsmToken::Hash))
> +        return Error(HashTok.getLoc(), "'#' expected");
> +      getLexer().Lex(); // Eat hash token.
> +
> +      if (getParser().ParseExpression(Offset))
> +       return true;
> +    }
> +    const AsmToken &RBracTok = getLexer().getTok();
> +    if (RBracTok.isNot(AsmToken::RBrac))
> +      return Error(RBracTok.getLoc(), "']' expected");
> +    getLexer().Lex(); // Eat right bracket token.
> +
> +    const AsmToken &ExclaimTok = getLexer().getTok();
> +    if (ExclaimTok.is(AsmToken::Exclaim)) {
> +      Writeback = true;
> +      getLexer().Lex(); // Eat exclaim token
> +    }

Can you please include an example of this form in the list of forms
this routine is parsing?

> +    Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
> +                               OffsetRegShifted, ShiftType, ShiftAmount,
> +                               Preindexed, Postindexed, Negative, Writeback);
> +    return false;
> +  }
> +  // The "[Rn" we have so far was not followed by a comma.
> +  else if (Tok.is(AsmToken::RBrac)) {
> +    // This is a post indexing addressing forms:
> +    //  [Rn], #offset
> +    //  [Rn], +/-Rm
> +    //  [Rn], +/-Rm, shift
> +    // that is a ']' follows after the "[Rn".
> +    Postindexed = true;
> +    Writeback = true;
> +    getLexer().Lex(); // Eat right bracket token.
> +
> +    const AsmToken &CommaTok = getLexer().getTok();
> +    if (CommaTok.isNot(AsmToken::Comma))
> +      return Error(CommaTok.getLoc(), "',' expected");
> +    getLexer().Lex(); // Eat comma token.
> +
> +    const AsmToken &NextTok = getLexer().getTok();
> +    if (NextTok.is(AsmToken::Plus))
> +      getLexer().Lex(); // Eat plus token.
> +    else if (NextTok.is(AsmToken::Minus)) {
> +      Negative = true;
> +      getLexer().Lex(); // Eat minus token
> +    }
> +
> +    // See if there is a register following the "[Rn]," we have so far.
> +    const AsmToken &OffsetRegTok = getLexer().getTok();
> +    unsigned OffsetRegNum = MatchRegisterName(OffsetRegTok.getString());
> +    bool OffsetRegShifted = false;
> +    enum ShiftType ShiftType;
> +    const MCExpr *ShiftAmount;
> +    const MCExpr *Offset;
> +    if (OffsetRegNum != 0) {
> +      OffsetIsReg = true;
> +      getLexer().Lex(); // Eat identifier token for the offset register.
> +      // Look for a comma then a shift
> +      const AsmToken &Tok = getLexer().getTok();
> +      if (Tok.is(AsmToken::Comma)) {
> +        getLexer().Lex(); // Eat comma token.
> +
> +        const AsmToken &Tok = getLexer().getTok();
> +        if (ParseShift(&ShiftType, ShiftAmount))
> +          return Error(Tok.getLoc(), "shift expected");
> +        OffsetRegShifted = true;
> +      }
> +    }
> +    else { // "[Rn]," we have so far was not followed by "Rm"
> +      // Look for #offset following the "[Rn],"
> +      const AsmToken &HashTok = getLexer().getTok();
> +      if (HashTok.isNot(AsmToken::Hash))
> +        return Error(HashTok.getLoc(), "'#' expected");
> +      getLexer().Lex(); // Eat hash token.
> +
> +      if (getParser().ParseExpression(Offset))
> +       return true;
> +    }
> +    Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
> +                               OffsetRegShifted, ShiftType, ShiftAmount,
> +                               Preindexed, Postindexed, Negative, Writeback);
> +    return false;
> +  }
> +
> +  return true;
> +}

I think it might be cleaner to factor this routine into a part that parses:
  ',' #offset
  ',' +/-Rm
  ',' +/-Rm, shift
and then reuse that for both the pre and post indexed forms.

> +/// ParseShift as one of these two:
> +///   ( lsl | lsr | asr | ror ) , # shift_amount
> +///   rrx
> +/// and returns true if it parses a shift otherwise it returns false.
> +bool ARMAsmParser::ParseShift(ShiftType *St, const MCExpr *ShiftAmount) {

This should take a reference to ShiftType instead of taking a pointer
to it (convention).

> +  const AsmToken &Tok = getLexer().getTok();
> +  if (Tok.isNot(AsmToken::Identifier))
> +    return true;
> +  const StringRef &ShiftName = Tok.getString();
> +  if (ShiftName == "lsl" || ShiftName == "LSL")
> +    *St = Lsl;
> +  else if (ShiftName == "lsr" || ShiftName == "LSR")
> +    *St = Lsr;
> +  else if (ShiftName == "asr" || ShiftName == "ASR")
> +    *St = Asr;
> +  else if (ShiftName == "ror" || ShiftName == "ROR")
> +    *St = Ror;
> +  else if (ShiftName == "rrx" || ShiftName == "RRX")
> +    *St = Rrx;
> +  else
> +    return true;
> +  getLexer().Lex(); // Eat shift type token.
> +
> +  // For all but a Rotate right there must be a '#' and a shift amount
> +  if (*St != Rrx) {

Chris will be happier if you write this as:
--
  // Rrx stands alone.
  if (*St == Rrx)
    return false;

  // Otherwise, there must be a '#' and a shift amount.
  ...
--
to reduce the nesting. :)

> +    // Look for # following the shift type
> +    const AsmToken &HashTok = getLexer().getTok();
> +    if (HashTok.isNot(AsmToken::Hash))
> +      return Error(HashTok.getLoc(), "'#' expected");
> +    getLexer().Lex(); // Eat hash token.
> +
> +    if (getParser().ParseExpression(ShiftAmount))
> +      return true;
> +  }
> +
> +  return false;
> +}
> +
> +// A hack to allow some testing
> +unsigned ARMAsmParser::MatchRegisterName(const StringRef &Name) {
> +  if (Name == "r1")
> +    return 1;
> +  else if (Name == "r2")
> +    return 2;
> +  else if (Name == "r3")
> +    return 3;
> +  return 0;
> +}
> +
> +// A hack to allow some testing
> +bool ARMAsmParser::MatchInstruction(SmallVectorImpl<ARMOperand> &Operands,
> +                                    MCInst &Inst) {
> +  struct ARMOperand Op0 = Operands[0];
> +  assert(Op0.Kind == ARMOperand::Token && "First operand not a Token");
> +  const StringRef &Mnemonic = Op0.getToken();
> +  if (Mnemonic == "add" ||
> +      Mnemonic == "ldr")
> +    return false;
> +
> +  return true;
> +}
> +
> +// TODO - this is a work in progress
> +bool ARMAsmParser::ParseOperand(ARMOperand &Op) {
> +  switch (getLexer().getKind()) {
> +  case AsmToken::Identifier:
> +    if (!ParseRegister(Op))
> +      return false;
> +    // TODO parse other operands that start with an identifier
> +    return true;
> +  case AsmToken::LBrac:
> +    if (!ParseMemory(Op))
> +      return false;
> +  default:
> +    return true;
> +  }
> +}
> +
>  bool ARMAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) {
> +  SmallVector<ARMOperand, 7> Operands;
> +
> +  Operands.push_back(ARMOperand::CreateToken(Name));
> +
>   SMLoc Loc = getLexer().getTok().getLoc();
> -  Error(Loc, "ARMAsmParser::ParseInstruction currently unimplemented");
> +  if (getLexer().isNot(AsmToken::EndOfStatement)) {
> +
> +    // Read the first operand.
> +    Operands.push_back(ARMOperand());
> +    if (ParseOperand(Operands.back()))
> +      return true;
> +
> +    while (getLexer().is(AsmToken::Comma)) {
> +      getLexer().Lex();  // Eat the comma.
> +
> +      // Parse and remember the operand.
> +      Operands.push_back(ARMOperand());
> +      if (ParseOperand(Operands.back()))
> +        return true;
> +    }
> +  }
> +  if (!MatchInstruction(Operands, Inst))
> +    return false;
> +
> +  Error(Loc, "ARMAsmParser::ParseInstruction only partly implemented");
>   return true;
>  }

Thanks!
 - Daniel