[llvm] r244068 - MIR Parser: Simplify the handling of quoted tokens. NFC.

Alex L arphaman at gmail.com
Wed Aug 5 16:45:23 PDT 2015


2015-08-05 16:35 GMT-07:00 Sean Silva <chisophugis at gmail.com>:

>
>
> On Wed, Aug 5, 2015 at 10:35 AM, Alex Lorenz <arphaman at gmail.com> wrote:
>
>> Author: arphaman
>> Date: Wed Aug  5 12:35:55 2015
>> New Revision: 244068
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=244068&view=rev
>> Log:
>> MIR Parser: Simplify the handling of quoted tokens. NFC.
>>
>> The machine instructions lexer should not expose the difference between
>> quoted
>> and unquoted tokens to the parser.
>>
>> Modified:
>>     llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp
>>     llvm/trunk/lib/CodeGen/MIRParser/MILexer.h
>>     llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp
>>
>> Modified: llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp?rev=244068&r1=244067&r2=244068&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp Wed Aug  5 12:35:55 2015
>> @@ -69,13 +69,14 @@ static bool isIdentifierChar(char C) {
>>           C == '$';
>>  }
>>
>> -void MIToken::unescapeQuotedStringValue(std::string &Str) const {
>> -  assert(isStringValueQuoted() && "String value isn't quoted");
>> -  StringRef Value = Range.drop_front(StringOffset);
>> +/// Unescapes the given string value.
>> +///
>> +/// Expects the string value to be quoted.
>> +static std::string unescapeQuotedString(StringRef Value) {
>>    assert(Value.front() == '"' && Value.back() == '"');
>>    Cursor C = Cursor(Value.substr(1, Value.size() - 2));
>>
>> -  Str.clear();
>> +  std::string Str;
>>    Str.reserve(C.remaining().size());
>>    while (!C.isEOF()) {
>>      char Char = C.peek();
>> @@ -95,6 +96,7 @@ void MIToken::unescapeQuotedStringValue(
>>      Str += Char;
>>      C.advance();
>>    }
>> +  return Str;
>>  }
>>
>>  /// Lex a string constant using the following regular expression:
>> \"[^\"]*\"
>> @@ -115,14 +117,16 @@ static Cursor lexStringConstant(
>>  }
>>
>>  static Cursor lexName(
>> -    Cursor C, MIToken &Token, MIToken::TokenKind Type,
>> -    MIToken::TokenKind QuotedType, unsigned PrefixLength,
>> +    Cursor C, MIToken &Token, MIToken::TokenKind Type, unsigned
>> PrefixLength,
>>      function_ref<void(StringRef::iterator Loc, const Twine &)>
>> ErrorCallback) {
>>    auto Range = C;
>>    C.advance(PrefixLength);
>>    if (C.peek() == '"') {
>>      if (Cursor R = lexStringConstant(C, ErrorCallback)) {
>> -      Token = MIToken(QuotedType, Range.upto(R), PrefixLength);
>> +      StringRef String = Range.upto(R);
>> +      Token = MIToken(Type, String,
>> +
>> unescapeQuotedString(String.drop_front(PrefixLength)),
>> +                      PrefixLength);
>>        return R;
>>      }
>>      Token = MIToken(MIToken::Error, Range.remaining());
>> @@ -257,8 +261,7 @@ static Cursor maybeLexIRBlock(
>>      return None;
>>    if (isdigit(C.peek(Rule.size())))
>>      return maybeLexIndex(C, Token, Rule, MIToken::IRBlock);
>> -  return lexName(C, Token, MIToken::NamedIRBlock,
>> MIToken::QuotedNamedIRBlock,
>> -                 Rule.size(), ErrorCallback);
>> +  return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(),
>> ErrorCallback);
>>  }
>>
>>  static Cursor maybeLexIRValue(
>> @@ -267,8 +270,7 @@ static Cursor maybeLexIRValue(
>>    const StringRef Rule = "%ir.";
>>    if (!C.remaining().startswith(Rule))
>>      return None;
>> -  return lexName(C, Token, MIToken::NamedIRValue,
>> MIToken::QuotedNamedIRValue,
>> -                 Rule.size(), ErrorCallback);
>> +  return lexName(C, Token, MIToken::NamedIRValue, Rule.size(),
>> ErrorCallback);
>>  }
>>
>>  static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
>> @@ -302,8 +304,7 @@ static Cursor maybeLexGlobalValue(
>>    if (C.peek() != '@')
>>      return None;
>>    if (!isdigit(C.peek(1)))
>> -    return lexName(C, Token, MIToken::NamedGlobalValue,
>> -                   MIToken::QuotedNamedGlobalValue, /*PrefixLength=*/1,
>> +    return lexName(C, Token, MIToken::NamedGlobalValue,
>> /*PrefixLength=*/1,
>>                     ErrorCallback);
>>    auto Range = C;
>>    C.advance(1); // Skip the '@'
>> @@ -320,9 +321,8 @@ static Cursor maybeLexExternalSymbol(
>>      function_ref<void(StringRef::iterator Loc, const Twine &)>
>> ErrorCallback) {
>>    if (C.peek() != '$')
>>      return None;
>> -  return lexName(C, Token, MIToken::ExternalSymbol,
>> -                 MIToken::QuotedExternalSymbol,
>> -                 /*PrefixLength=*/1, ErrorCallback);
>> +  return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1,
>> +                 ErrorCallback);
>>  }
>>
>>  static bool isValidHexFloatingPointPrefix(char C) {
>>
>> Modified: llvm/trunk/lib/CodeGen/MIRParser/MILexer.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MIRParser/MILexer.h?rev=244068&r1=244067&r2=244068&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/CodeGen/MIRParser/MILexer.h (original)
>> +++ llvm/trunk/lib/CodeGen/MIRParser/MILexer.h Wed Aug  5 12:35:55 2015
>> @@ -70,10 +70,8 @@ struct MIToken {
>>      StackObject,
>>      FixedStackObject,
>>      NamedGlobalValue,
>> -    QuotedNamedGlobalValue,
>>      GlobalValue,
>>      ExternalSymbol,
>> -    QuotedExternalSymbol,
>>
>>      // Other tokens
>>      IntegerLiteral,
>> @@ -82,25 +80,32 @@ struct MIToken {
>>      ConstantPoolItem,
>>      JumpTableIndex,
>>      NamedIRBlock,
>> -    QuotedNamedIRBlock,
>>      IRBlock,
>>      NamedIRValue,
>> -    QuotedNamedIRValue,
>>    };
>>
>>  private:
>>    TokenKind Kind;
>>    unsigned StringOffset;
>> +  bool HasStringValue;
>>    StringRef Range;
>> +  std::string StringValue;
>>
>
> This seems really complicated to me (having to juggle StringOffset and a
> bool about whether to use the offset). Can we just have a `StringRef
> StrVal;` which either points into the source or to the owning buffer inside
> the token? Then `stringValue()` is just `return StrVal;`.
>

Sure, I can do that. I just used one StringRef because I wanted to keep
the source range and string value in one string ref, but I can split them up
and simplify the logic in the stringValue method.


>
>
>
>>    APSInt IntVal;
>>
>>  public:
>>    MIToken(TokenKind Kind, StringRef Range, unsigned StringOffset = 0)
>> -      : Kind(Kind), StringOffset(StringOffset), Range(Range) {}
>> +      : Kind(Kind), StringOffset(StringOffset), HasStringValue(false),
>> +        Range(Range) {}
>> +
>> +  MIToken(TokenKind Kind, StringRef Range, std::string StringValue,
>> +          unsigned StringOffset = 0)
>> +      : Kind(Kind), StringOffset(StringOffset), HasStringValue(true),
>> +        Range(Range), StringValue(std::move(StringValue)) {}
>>
>>    MIToken(TokenKind Kind, StringRef Range, const APSInt &IntVal,
>>            unsigned StringOffset = 0)
>> -      : Kind(Kind), StringOffset(StringOffset), Range(Range),
>> IntVal(IntVal) {}
>> +      : Kind(Kind), StringOffset(StringOffset), HasStringValue(false),
>> +        Range(Range), IntVal(IntVal) {}
>>
>>    TokenKind kind() const { return Kind; }
>>
>> @@ -124,30 +129,18 @@ public:
>>
>>    StringRef::iterator location() const { return Range.begin(); }
>>
>> -  bool isStringValueQuoted() const {
>> -    return Kind == QuotedNamedGlobalValue || Kind ==
>> QuotedExternalSymbol ||
>> -           Kind == QuotedNamedIRBlock || Kind == QuotedNamedIRValue;
>> -  }
>> -
>>    /// Return the token's raw string value.
>>    ///
>>    /// If the string value is quoted, this method returns that quoted
>> string as
>>    /// it is, without unescaping the string value.
>>    StringRef rawStringValue() const { return
>> Range.drop_front(StringOffset); }
>>
>
> rawStringValue shouldn't be needed anymore, right?
>

I use the rawStringValue for some of the error messages - so that
I can print out the full name (including quotes) in the error message.
But I think I can get rid of it if I use the full raw token in the error
message instead of just string value. I'll be able to get the raw token
from the range StringRef.


> -- Sean Silva
>
>
>>
>> -  /// Return token's string value.
>> -  ///
>> -  /// Expects the string value to be unquoted.
>> +  /// Return the token's string value.
>>    StringRef stringValue() const {
>> -    assert(!isStringValueQuoted() && "String value is quoted");
>> -    return Range.drop_front(StringOffset);
>> +    return HasStringValue ? StringRef(StringValue)
>> +                          : Range.drop_front(StringOffset);
>>    }
>>
>> -  /// Unescapes the token's string value.
>> -  ///
>> -  /// Expects the string value to be quoted.
>> -  void unescapeQuotedStringValue(std::string &Str) const;
>> -
>>    const APSInt &integerValue() const { return IntVal; }
>>
>>    bool hasIntegerValue() const {
>>
>> Modified: llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp?rev=244068&r1=244067&r2=244068&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp Wed Aug  5 12:35:55 2015
>> @@ -37,22 +37,6 @@ using namespace llvm;
>>
>>  namespace {
>>
>> -struct StringValueUtility {
>> -  StringRef String;
>> -  std::string UnescapedString;
>> -
>> -  StringValueUtility(const MIToken &Token) {
>> -    if (Token.isStringValueQuoted()) {
>> -      Token.unescapeQuotedStringValue(UnescapedString);
>> -      String = UnescapedString;
>> -      return;
>> -    }
>> -    String = Token.stringValue();
>> -  }
>> -
>> -  operator StringRef() const { return String; }
>> -};
>> -
>>  /// A wrapper struct around the 'MachineOperand' struct that includes a
>> source
>>  /// range.
>>  struct MachineOperandWithLocation {
>> @@ -651,11 +635,9 @@ bool MIParser::parseFixedStackObjectOper
>>
>>  bool MIParser::parseGlobalValue(GlobalValue *&GV) {
>>    switch (Token.kind()) {
>> -  case MIToken::NamedGlobalValue:
>> -  case MIToken::QuotedNamedGlobalValue: {
>> -    StringValueUtility Name(Token);
>> +  case MIToken::NamedGlobalValue: {
>>      const Module *M = MF.getFunction()->getParent();
>> -    GV = M->getNamedValue(Name);
>> +    GV = M->getNamedValue(Token.stringValue());
>>      if (!GV)
>>        return error(Twine("use of undefined global value '@") +
>>                     Token.rawStringValue() + "'");
>> @@ -716,10 +698,8 @@ bool MIParser::parseJumpTableIndexOperan
>>  }
>>
>>  bool MIParser::parseExternalSymbolOperand(MachineOperand &Dest) {
>> -  assert(Token.is(MIToken::ExternalSymbol) ||
>> -         Token.is(MIToken::QuotedExternalSymbol));
>> -  StringValueUtility Name(Token);
>> -  const char *Symbol = MF.createExternalSymbolName(Name);
>> +  assert(Token.is(MIToken::ExternalSymbol));
>> +  const char *Symbol = MF.createExternalSymbolName(Token.stringValue());
>>    lex();
>>    // TODO: Parse the target flags.
>>    Dest = MachineOperand::CreateES(Symbol);
>> @@ -823,10 +803,9 @@ bool MIParser::parseCFIOperand(MachineOp
>>
>>  bool MIParser::parseIRBlock(BasicBlock *&BB, const Function &F) {
>>    switch (Token.kind()) {
>> -  case MIToken::NamedIRBlock:
>> -  case MIToken::QuotedNamedIRBlock: {
>> -    StringValueUtility Name(Token);
>> -    BB =
>> dyn_cast_or_null<BasicBlock>(F.getValueSymbolTable().lookup(Name));
>> +  case MIToken::NamedIRBlock: {
>> +    BB = dyn_cast_or_null<BasicBlock>(
>> +        F.getValueSymbolTable().lookup(Token.stringValue()));
>>      if (!BB)
>>        return error(Twine("use of undefined IR block '%ir-block.") +
>>                     Token.rawStringValue() + "'");
>> @@ -854,8 +833,7 @@ bool MIParser::parseBlockAddressOperand(
>>    if (expectAndConsume(MIToken::lparen))
>>      return true;
>>    if (Token.isNot(MIToken::GlobalValue) &&
>> -      Token.isNot(MIToken::NamedGlobalValue) &&
>> -      Token.isNot(MIToken::QuotedNamedGlobalValue))
>> +      Token.isNot(MIToken::NamedGlobalValue))
>>      return error("expected a global value");
>>    GlobalValue *GV = nullptr;
>>    if (parseGlobalValue(GV))
>> @@ -867,8 +845,7 @@ bool MIParser::parseBlockAddressOperand(
>>    if (expectAndConsume(MIToken::comma))
>>      return true;
>>    BasicBlock *BB = nullptr;
>> -  if (Token.isNot(MIToken::IRBlock) &&
>> Token.isNot(MIToken::NamedIRBlock) &&
>> -      Token.isNot(MIToken::QuotedNamedIRBlock))
>> +  if (Token.isNot(MIToken::IRBlock) &&
>> Token.isNot(MIToken::NamedIRBlock))
>>      return error("expected an IR block reference");
>>    if (parseIRBlock(BB, *F))
>>      return true;
>> @@ -926,14 +903,12 @@ bool MIParser::parseMachineOperand(Machi
>>      return parseFixedStackObjectOperand(Dest);
>>    case MIToken::GlobalValue:
>>    case MIToken::NamedGlobalValue:
>> -  case MIToken::QuotedNamedGlobalValue:
>>      return parseGlobalAddressOperand(Dest);
>>    case MIToken::ConstantPoolItem:
>>      return parseConstantPoolIndexOperand(Dest);
>>    case MIToken::JumpTableIndex:
>>      return parseJumpTableIndexOperand(Dest);
>>    case MIToken::ExternalSymbol:
>> -  case MIToken::QuotedExternalSymbol:
>>      return parseExternalSymbolOperand(Dest);
>>    case MIToken::exclaim:
>>      return parseMetadataOperand(Dest);
>> @@ -964,10 +939,8 @@ bool MIParser::parseMachineOperand(Machi
>>
>>  bool MIParser::parseIRValue(Value *&V) {
>>    switch (Token.kind()) {
>> -  case MIToken::NamedIRValue:
>> -  case MIToken::QuotedNamedIRValue: {
>> -    StringValueUtility Name(Token);
>> -    V = MF.getFunction()->getValueSymbolTable().lookup(Name);
>> +  case MIToken::NamedIRValue: {
>> +    V =
>> MF.getFunction()->getValueSymbolTable().lookup(Token.stringValue());
>>      if (!V)
>>        return error(Twine("use of undefined IR value '%ir.") +
>>                     Token.rawStringValue() + "'");
>> @@ -1032,8 +1005,7 @@ bool MIParser::parseMachineMemoryOperand
>>    lex();
>>
>>    // TODO: Parse pseudo source values.
>> -  if (Token.isNot(MIToken::NamedIRValue) &&
>> -      Token.isNot(MIToken::QuotedNamedIRValue))
>> +  if (Token.isNot(MIToken::NamedIRValue))
>>      return error("expected an IR value reference");
>>    Value *V = nullptr;
>>    if (parseIRValue(V))
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150805/1228c8df/attachment.html>


More information about the llvm-commits mailing list