[llvm] r244068 - MIR Parser: Simplify the handling of quoted tokens. NFC.

Sean Silva chisophugis at gmail.com
Wed Aug 5 16:35:12 PDT 2015


On Wed, Aug 5, 2015 at 10:35 AM, Alex Lorenz <arphaman at gmail.com> wrote:

> Author: arphaman
> Date: Wed Aug  5 12:35:55 2015
> New Revision: 244068
>
> URL: http://llvm.org/viewvc/llvm-project?rev=244068&view=rev
> Log:
> MIR Parser: Simplify the handling of quoted tokens. NFC.
>
> The machine instructions lexer should not expose the difference between
> quoted
> and unquoted tokens to the parser.
>
> Modified:
>     llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp
>     llvm/trunk/lib/CodeGen/MIRParser/MILexer.h
>     llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp
>
> Modified: llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp?rev=244068&r1=244067&r2=244068&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp (original)
> +++ llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp Wed Aug  5 12:35:55 2015
> @@ -69,13 +69,14 @@ static bool isIdentifierChar(char C) {
>           C == '$';
>  }
>
> -void MIToken::unescapeQuotedStringValue(std::string &Str) const {
> -  assert(isStringValueQuoted() && "String value isn't quoted");
> -  StringRef Value = Range.drop_front(StringOffset);
> +/// Unescapes the given string value.
> +///
> +/// Expects the string value to be quoted.
> +static std::string unescapeQuotedString(StringRef Value) {
>    assert(Value.front() == '"' && Value.back() == '"');
>    Cursor C = Cursor(Value.substr(1, Value.size() - 2));
>
> -  Str.clear();
> +  std::string Str;
>    Str.reserve(C.remaining().size());
>    while (!C.isEOF()) {
>      char Char = C.peek();
> @@ -95,6 +96,7 @@ void MIToken::unescapeQuotedStringValue(
>      Str += Char;
>      C.advance();
>    }
> +  return Str;
>  }
>
>  /// Lex a string constant using the following regular expression:
> \"[^\"]*\"
> @@ -115,14 +117,16 @@ static Cursor lexStringConstant(
>  }
>
>  static Cursor lexName(
> -    Cursor C, MIToken &Token, MIToken::TokenKind Type,
> -    MIToken::TokenKind QuotedType, unsigned PrefixLength,
> +    Cursor C, MIToken &Token, MIToken::TokenKind Type, unsigned
> PrefixLength,
>      function_ref<void(StringRef::iterator Loc, const Twine &)>
> ErrorCallback) {
>    auto Range = C;
>    C.advance(PrefixLength);
>    if (C.peek() == '"') {
>      if (Cursor R = lexStringConstant(C, ErrorCallback)) {
> -      Token = MIToken(QuotedType, Range.upto(R), PrefixLength);
> +      StringRef String = Range.upto(R);
> +      Token = MIToken(Type, String,
> +
> unescapeQuotedString(String.drop_front(PrefixLength)),
> +                      PrefixLength);
>        return R;
>      }
>      Token = MIToken(MIToken::Error, Range.remaining());
> @@ -257,8 +261,7 @@ static Cursor maybeLexIRBlock(
>      return None;
>    if (isdigit(C.peek(Rule.size())))
>      return maybeLexIndex(C, Token, Rule, MIToken::IRBlock);
> -  return lexName(C, Token, MIToken::NamedIRBlock,
> MIToken::QuotedNamedIRBlock,
> -                 Rule.size(), ErrorCallback);
> +  return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(),
> ErrorCallback);
>  }
>
>  static Cursor maybeLexIRValue(
> @@ -267,8 +270,7 @@ static Cursor maybeLexIRValue(
>    const StringRef Rule = "%ir.";
>    if (!C.remaining().startswith(Rule))
>      return None;
> -  return lexName(C, Token, MIToken::NamedIRValue,
> MIToken::QuotedNamedIRValue,
> -                 Rule.size(), ErrorCallback);
> +  return lexName(C, Token, MIToken::NamedIRValue, Rule.size(),
> ErrorCallback);
>  }
>
>  static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
> @@ -302,8 +304,7 @@ static Cursor maybeLexGlobalValue(
>    if (C.peek() != '@')
>      return None;
>    if (!isdigit(C.peek(1)))
> -    return lexName(C, Token, MIToken::NamedGlobalValue,
> -                   MIToken::QuotedNamedGlobalValue, /*PrefixLength=*/1,
> +    return lexName(C, Token, MIToken::NamedGlobalValue,
> /*PrefixLength=*/1,
>                     ErrorCallback);
>    auto Range = C;
>    C.advance(1); // Skip the '@'
> @@ -320,9 +321,8 @@ static Cursor maybeLexExternalSymbol(
>      function_ref<void(StringRef::iterator Loc, const Twine &)>
> ErrorCallback) {
>    if (C.peek() != '$')
>      return None;
> -  return lexName(C, Token, MIToken::ExternalSymbol,
> -                 MIToken::QuotedExternalSymbol,
> -                 /*PrefixLength=*/1, ErrorCallback);
> +  return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1,
> +                 ErrorCallback);
>  }
>
>  static bool isValidHexFloatingPointPrefix(char C) {
>
> Modified: llvm/trunk/lib/CodeGen/MIRParser/MILexer.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MIRParser/MILexer.h?rev=244068&r1=244067&r2=244068&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/MIRParser/MILexer.h (original)
> +++ llvm/trunk/lib/CodeGen/MIRParser/MILexer.h Wed Aug  5 12:35:55 2015
> @@ -70,10 +70,8 @@ struct MIToken {
>      StackObject,
>      FixedStackObject,
>      NamedGlobalValue,
> -    QuotedNamedGlobalValue,
>      GlobalValue,
>      ExternalSymbol,
> -    QuotedExternalSymbol,
>
>      // Other tokens
>      IntegerLiteral,
> @@ -82,25 +80,32 @@ struct MIToken {
>      ConstantPoolItem,
>      JumpTableIndex,
>      NamedIRBlock,
> -    QuotedNamedIRBlock,
>      IRBlock,
>      NamedIRValue,
> -    QuotedNamedIRValue,
>    };
>
>  private:
>    TokenKind Kind;
>    unsigned StringOffset;
> +  bool HasStringValue;
>    StringRef Range;
> +  std::string StringValue;
>

This seems really complicated to me (having to juggle StringOffset and a
bool about whether to use the offset). Can we just have a `StringRef
StrVal;` which either points into the source or to the owning buffer inside
the token? Then `stringValue()` is just `return StrVal;`.



>    APSInt IntVal;
>
>  public:
>    MIToken(TokenKind Kind, StringRef Range, unsigned StringOffset = 0)
> -      : Kind(Kind), StringOffset(StringOffset), Range(Range) {}
> +      : Kind(Kind), StringOffset(StringOffset), HasStringValue(false),
> +        Range(Range) {}
> +
> +  MIToken(TokenKind Kind, StringRef Range, std::string StringValue,
> +          unsigned StringOffset = 0)
> +      : Kind(Kind), StringOffset(StringOffset), HasStringValue(true),
> +        Range(Range), StringValue(std::move(StringValue)) {}
>
>    MIToken(TokenKind Kind, StringRef Range, const APSInt &IntVal,
>            unsigned StringOffset = 0)
> -      : Kind(Kind), StringOffset(StringOffset), Range(Range),
> IntVal(IntVal) {}
> +      : Kind(Kind), StringOffset(StringOffset), HasStringValue(false),
> +        Range(Range), IntVal(IntVal) {}
>
>    TokenKind kind() const { return Kind; }
>
> @@ -124,30 +129,18 @@ public:
>
>    StringRef::iterator location() const { return Range.begin(); }
>
> -  bool isStringValueQuoted() const {
> -    return Kind == QuotedNamedGlobalValue || Kind == QuotedExternalSymbol
> ||
> -           Kind == QuotedNamedIRBlock || Kind == QuotedNamedIRValue;
> -  }
> -
>    /// Return the token's raw string value.
>    ///
>    /// If the string value is quoted, this method returns that quoted
> string as
>    /// it is, without unescaping the string value.
>    StringRef rawStringValue() const { return
> Range.drop_front(StringOffset); }
>

rawStringValue shouldn't be needed anymore, right?

-- Sean Silva


>
> -  /// Return token's string value.
> -  ///
> -  /// Expects the string value to be unquoted.
> +  /// Return the token's string value.
>    StringRef stringValue() const {
> -    assert(!isStringValueQuoted() && "String value is quoted");
> -    return Range.drop_front(StringOffset);
> +    return HasStringValue ? StringRef(StringValue)
> +                          : Range.drop_front(StringOffset);
>    }
>
> -  /// Unescapes the token's string value.
> -  ///
> -  /// Expects the string value to be quoted.
> -  void unescapeQuotedStringValue(std::string &Str) const;
> -
>    const APSInt &integerValue() const { return IntVal; }
>
>    bool hasIntegerValue() const {
>
> Modified: llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp?rev=244068&r1=244067&r2=244068&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp (original)
> +++ llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp Wed Aug  5 12:35:55 2015
> @@ -37,22 +37,6 @@ using namespace llvm;
>
>  namespace {
>
> -struct StringValueUtility {
> -  StringRef String;
> -  std::string UnescapedString;
> -
> -  StringValueUtility(const MIToken &Token) {
> -    if (Token.isStringValueQuoted()) {
> -      Token.unescapeQuotedStringValue(UnescapedString);
> -      String = UnescapedString;
> -      return;
> -    }
> -    String = Token.stringValue();
> -  }
> -
> -  operator StringRef() const { return String; }
> -};
> -
>  /// A wrapper struct around the 'MachineOperand' struct that includes a
> source
>  /// range.
>  struct MachineOperandWithLocation {
> @@ -651,11 +635,9 @@ bool MIParser::parseFixedStackObjectOper
>
>  bool MIParser::parseGlobalValue(GlobalValue *&GV) {
>    switch (Token.kind()) {
> -  case MIToken::NamedGlobalValue:
> -  case MIToken::QuotedNamedGlobalValue: {
> -    StringValueUtility Name(Token);
> +  case MIToken::NamedGlobalValue: {
>      const Module *M = MF.getFunction()->getParent();
> -    GV = M->getNamedValue(Name);
> +    GV = M->getNamedValue(Token.stringValue());
>      if (!GV)
>        return error(Twine("use of undefined global value '@") +
>                     Token.rawStringValue() + "'");
> @@ -716,10 +698,8 @@ bool MIParser::parseJumpTableIndexOperan
>  }
>
>  bool MIParser::parseExternalSymbolOperand(MachineOperand &Dest) {
> -  assert(Token.is(MIToken::ExternalSymbol) ||
> -         Token.is(MIToken::QuotedExternalSymbol));
> -  StringValueUtility Name(Token);
> -  const char *Symbol = MF.createExternalSymbolName(Name);
> +  assert(Token.is(MIToken::ExternalSymbol));
> +  const char *Symbol = MF.createExternalSymbolName(Token.stringValue());
>    lex();
>    // TODO: Parse the target flags.
>    Dest = MachineOperand::CreateES(Symbol);
> @@ -823,10 +803,9 @@ bool MIParser::parseCFIOperand(MachineOp
>
>  bool MIParser::parseIRBlock(BasicBlock *&BB, const Function &F) {
>    switch (Token.kind()) {
> -  case MIToken::NamedIRBlock:
> -  case MIToken::QuotedNamedIRBlock: {
> -    StringValueUtility Name(Token);
> -    BB =
> dyn_cast_or_null<BasicBlock>(F.getValueSymbolTable().lookup(Name));
> +  case MIToken::NamedIRBlock: {
> +    BB = dyn_cast_or_null<BasicBlock>(
> +        F.getValueSymbolTable().lookup(Token.stringValue()));
>      if (!BB)
>        return error(Twine("use of undefined IR block '%ir-block.") +
>                     Token.rawStringValue() + "'");
> @@ -854,8 +833,7 @@ bool MIParser::parseBlockAddressOperand(
>    if (expectAndConsume(MIToken::lparen))
>      return true;
>    if (Token.isNot(MIToken::GlobalValue) &&
> -      Token.isNot(MIToken::NamedGlobalValue) &&
> -      Token.isNot(MIToken::QuotedNamedGlobalValue))
> +      Token.isNot(MIToken::NamedGlobalValue))
>      return error("expected a global value");
>    GlobalValue *GV = nullptr;
>    if (parseGlobalValue(GV))
> @@ -867,8 +845,7 @@ bool MIParser::parseBlockAddressOperand(
>    if (expectAndConsume(MIToken::comma))
>      return true;
>    BasicBlock *BB = nullptr;
> -  if (Token.isNot(MIToken::IRBlock) && Token.isNot(MIToken::NamedIRBlock)
> &&
> -      Token.isNot(MIToken::QuotedNamedIRBlock))
> +  if (Token.isNot(MIToken::IRBlock) && Token.isNot(MIToken::NamedIRBlock))
>      return error("expected an IR block reference");
>    if (parseIRBlock(BB, *F))
>      return true;
> @@ -926,14 +903,12 @@ bool MIParser::parseMachineOperand(Machi
>      return parseFixedStackObjectOperand(Dest);
>    case MIToken::GlobalValue:
>    case MIToken::NamedGlobalValue:
> -  case MIToken::QuotedNamedGlobalValue:
>      return parseGlobalAddressOperand(Dest);
>    case MIToken::ConstantPoolItem:
>      return parseConstantPoolIndexOperand(Dest);
>    case MIToken::JumpTableIndex:
>      return parseJumpTableIndexOperand(Dest);
>    case MIToken::ExternalSymbol:
> -  case MIToken::QuotedExternalSymbol:
>      return parseExternalSymbolOperand(Dest);
>    case MIToken::exclaim:
>      return parseMetadataOperand(Dest);
> @@ -964,10 +939,8 @@ bool MIParser::parseMachineOperand(Machi
>
>  bool MIParser::parseIRValue(Value *&V) {
>    switch (Token.kind()) {
> -  case MIToken::NamedIRValue:
> -  case MIToken::QuotedNamedIRValue: {
> -    StringValueUtility Name(Token);
> -    V = MF.getFunction()->getValueSymbolTable().lookup(Name);
> +  case MIToken::NamedIRValue: {
> +    V =
> MF.getFunction()->getValueSymbolTable().lookup(Token.stringValue());
>      if (!V)
>        return error(Twine("use of undefined IR value '%ir.") +
>                     Token.rawStringValue() + "'");
> @@ -1032,8 +1005,7 @@ bool MIParser::parseMachineMemoryOperand
>    lex();
>
>    // TODO: Parse pseudo source values.
> -  if (Token.isNot(MIToken::NamedIRValue) &&
> -      Token.isNot(MIToken::QuotedNamedIRValue))
> +  if (Token.isNot(MIToken::NamedIRValue))
>      return error("expected an IR value reference");
>    Value *V = nullptr;
>    if (parseIRValue(V))
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150805/55c0736c/attachment.html>


More information about the llvm-commits mailing list