[llvm] r244068 - MIR Parser: Simplify the handling of quoted tokens. NFC.
Alex L
arphaman at gmail.com
Wed Aug 5 16:45:23 PDT 2015
2015-08-05 16:35 GMT-07:00 Sean Silva <chisophugis at gmail.com>:
>
>
> On Wed, Aug 5, 2015 at 10:35 AM, Alex Lorenz <arphaman at gmail.com> wrote:
>
>> Author: arphaman
>> Date: Wed Aug 5 12:35:55 2015
>> New Revision: 244068
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=244068&view=rev
>> Log:
>> MIR Parser: Simplify the handling of quoted tokens. NFC.
>>
>> The machine instructions lexer should not expose the difference between
>> quoted
>> and unquoted tokens to the parser.
>>
>> Modified:
>> llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp
>> llvm/trunk/lib/CodeGen/MIRParser/MILexer.h
>> llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp
>>
>> Modified: llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp?rev=244068&r1=244067&r2=244068&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp Wed Aug 5 12:35:55 2015
>> @@ -69,13 +69,14 @@ static bool isIdentifierChar(char C) {
>> C == '$';
>> }
>>
>> -void MIToken::unescapeQuotedStringValue(std::string &Str) const {
>> - assert(isStringValueQuoted() && "String value isn't quoted");
>> - StringRef Value = Range.drop_front(StringOffset);
>> +/// Unescapes the given string value.
>> +///
>> +/// Expects the string value to be quoted.
>> +static std::string unescapeQuotedString(StringRef Value) {
>> assert(Value.front() == '"' && Value.back() == '"');
>> Cursor C = Cursor(Value.substr(1, Value.size() - 2));
>>
>> - Str.clear();
>> + std::string Str;
>> Str.reserve(C.remaining().size());
>> while (!C.isEOF()) {
>> char Char = C.peek();
>> @@ -95,6 +96,7 @@ void MIToken::unescapeQuotedStringValue(
>> Str += Char;
>> C.advance();
>> }
>> + return Str;
>> }
>>
>> /// Lex a string constant using the following regular expression:
>> \"[^\"]*\"
>> @@ -115,14 +117,16 @@ static Cursor lexStringConstant(
>> }
>>
>> static Cursor lexName(
>> - Cursor C, MIToken &Token, MIToken::TokenKind Type,
>> - MIToken::TokenKind QuotedType, unsigned PrefixLength,
>> + Cursor C, MIToken &Token, MIToken::TokenKind Type, unsigned
>> PrefixLength,
>> function_ref<void(StringRef::iterator Loc, const Twine &)>
>> ErrorCallback) {
>> auto Range = C;
>> C.advance(PrefixLength);
>> if (C.peek() == '"') {
>> if (Cursor R = lexStringConstant(C, ErrorCallback)) {
>> - Token = MIToken(QuotedType, Range.upto(R), PrefixLength);
>> + StringRef String = Range.upto(R);
>> + Token = MIToken(Type, String,
>> +
>> unescapeQuotedString(String.drop_front(PrefixLength)),
>> + PrefixLength);
>> return R;
>> }
>> Token = MIToken(MIToken::Error, Range.remaining());
>> @@ -257,8 +261,7 @@ static Cursor maybeLexIRBlock(
>> return None;
>> if (isdigit(C.peek(Rule.size())))
>> return maybeLexIndex(C, Token, Rule, MIToken::IRBlock);
>> - return lexName(C, Token, MIToken::NamedIRBlock,
>> MIToken::QuotedNamedIRBlock,
>> - Rule.size(), ErrorCallback);
>> + return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(),
>> ErrorCallback);
>> }
>>
>> static Cursor maybeLexIRValue(
>> @@ -267,8 +270,7 @@ static Cursor maybeLexIRValue(
>> const StringRef Rule = "%ir.";
>> if (!C.remaining().startswith(Rule))
>> return None;
>> - return lexName(C, Token, MIToken::NamedIRValue,
>> MIToken::QuotedNamedIRValue,
>> - Rule.size(), ErrorCallback);
>> + return lexName(C, Token, MIToken::NamedIRValue, Rule.size(),
>> ErrorCallback);
>> }
>>
>> static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
>> @@ -302,8 +304,7 @@ static Cursor maybeLexGlobalValue(
>> if (C.peek() != '@')
>> return None;
>> if (!isdigit(C.peek(1)))
>> - return lexName(C, Token, MIToken::NamedGlobalValue,
>> - MIToken::QuotedNamedGlobalValue, /*PrefixLength=*/1,
>> + return lexName(C, Token, MIToken::NamedGlobalValue,
>> /*PrefixLength=*/1,
>> ErrorCallback);
>> auto Range = C;
>> C.advance(1); // Skip the '@'
>> @@ -320,9 +321,8 @@ static Cursor maybeLexExternalSymbol(
>> function_ref<void(StringRef::iterator Loc, const Twine &)>
>> ErrorCallback) {
>> if (C.peek() != '$')
>> return None;
>> - return lexName(C, Token, MIToken::ExternalSymbol,
>> - MIToken::QuotedExternalSymbol,
>> - /*PrefixLength=*/1, ErrorCallback);
>> + return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1,
>> + ErrorCallback);
>> }
>>
>> static bool isValidHexFloatingPointPrefix(char C) {
>>
>> Modified: llvm/trunk/lib/CodeGen/MIRParser/MILexer.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MIRParser/MILexer.h?rev=244068&r1=244067&r2=244068&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/CodeGen/MIRParser/MILexer.h (original)
>> +++ llvm/trunk/lib/CodeGen/MIRParser/MILexer.h Wed Aug 5 12:35:55 2015
>> @@ -70,10 +70,8 @@ struct MIToken {
>> StackObject,
>> FixedStackObject,
>> NamedGlobalValue,
>> - QuotedNamedGlobalValue,
>> GlobalValue,
>> ExternalSymbol,
>> - QuotedExternalSymbol,
>>
>> // Other tokens
>> IntegerLiteral,
>> @@ -82,25 +80,32 @@ struct MIToken {
>> ConstantPoolItem,
>> JumpTableIndex,
>> NamedIRBlock,
>> - QuotedNamedIRBlock,
>> IRBlock,
>> NamedIRValue,
>> - QuotedNamedIRValue,
>> };
>>
>> private:
>> TokenKind Kind;
>> unsigned StringOffset;
>> + bool HasStringValue;
>> StringRef Range;
>> + std::string StringValue;
>>
>
> This seems really complicated to me (having to juggle StringOffset and a
> bool about whether to use the offset). Can we just have a `StringRef
> StrVal;` which either points into the source or to the owning buffer inside
> the token? Then `stringValue()` is just `return StrVal;`.
>
Sure, I can do that. I just used one StringRef because I wanted to keep
the source range and string value in one string ref, but I can split them up
and simplify the logic in the stringValue method.
>
>
>
>> APSInt IntVal;
>>
>> public:
>> MIToken(TokenKind Kind, StringRef Range, unsigned StringOffset = 0)
>> - : Kind(Kind), StringOffset(StringOffset), Range(Range) {}
>> + : Kind(Kind), StringOffset(StringOffset), HasStringValue(false),
>> + Range(Range) {}
>> +
>> + MIToken(TokenKind Kind, StringRef Range, std::string StringValue,
>> + unsigned StringOffset = 0)
>> + : Kind(Kind), StringOffset(StringOffset), HasStringValue(true),
>> + Range(Range), StringValue(std::move(StringValue)) {}
>>
>> MIToken(TokenKind Kind, StringRef Range, const APSInt &IntVal,
>> unsigned StringOffset = 0)
>> - : Kind(Kind), StringOffset(StringOffset), Range(Range),
>> IntVal(IntVal) {}
>> + : Kind(Kind), StringOffset(StringOffset), HasStringValue(false),
>> + Range(Range), IntVal(IntVal) {}
>>
>> TokenKind kind() const { return Kind; }
>>
>> @@ -124,30 +129,18 @@ public:
>>
>> StringRef::iterator location() const { return Range.begin(); }
>>
>> - bool isStringValueQuoted() const {
>> - return Kind == QuotedNamedGlobalValue || Kind ==
>> QuotedExternalSymbol ||
>> - Kind == QuotedNamedIRBlock || Kind == QuotedNamedIRValue;
>> - }
>> -
>> /// Return the token's raw string value.
>> ///
>> /// If the string value is quoted, this method returns that quoted
>> string as
>> /// it is, without unescaping the string value.
>> StringRef rawStringValue() const { return
>> Range.drop_front(StringOffset); }
>>
>
> rawStringValue shouldn't be needed anymore, right?
>
I use the rawStringValue for some of the error messages - so that
I can print out the full name (including quotes) in the error message.
But I think I can get rid of it if I use the full raw token in the error
message instead of just string value. I'll be able to get the raw token
from the range StringRef.
> -- Sean Silva
>
>
>>
>> - /// Return token's string value.
>> - ///
>> - /// Expects the string value to be unquoted.
>> + /// Return the token's string value.
>> StringRef stringValue() const {
>> - assert(!isStringValueQuoted() && "String value is quoted");
>> - return Range.drop_front(StringOffset);
>> + return HasStringValue ? StringRef(StringValue)
>> + : Range.drop_front(StringOffset);
>> }
>>
>> - /// Unescapes the token's string value.
>> - ///
>> - /// Expects the string value to be quoted.
>> - void unescapeQuotedStringValue(std::string &Str) const;
>> -
>> const APSInt &integerValue() const { return IntVal; }
>>
>> bool hasIntegerValue() const {
>>
>> Modified: llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp?rev=244068&r1=244067&r2=244068&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp Wed Aug 5 12:35:55 2015
>> @@ -37,22 +37,6 @@ using namespace llvm;
>>
>> namespace {
>>
>> -struct StringValueUtility {
>> - StringRef String;
>> - std::string UnescapedString;
>> -
>> - StringValueUtility(const MIToken &Token) {
>> - if (Token.isStringValueQuoted()) {
>> - Token.unescapeQuotedStringValue(UnescapedString);
>> - String = UnescapedString;
>> - return;
>> - }
>> - String = Token.stringValue();
>> - }
>> -
>> - operator StringRef() const { return String; }
>> -};
>> -
>> /// A wrapper struct around the 'MachineOperand' struct that includes a
>> source
>> /// range.
>> struct MachineOperandWithLocation {
>> @@ -651,11 +635,9 @@ bool MIParser::parseFixedStackObjectOper
>>
>> bool MIParser::parseGlobalValue(GlobalValue *&GV) {
>> switch (Token.kind()) {
>> - case MIToken::NamedGlobalValue:
>> - case MIToken::QuotedNamedGlobalValue: {
>> - StringValueUtility Name(Token);
>> + case MIToken::NamedGlobalValue: {
>> const Module *M = MF.getFunction()->getParent();
>> - GV = M->getNamedValue(Name);
>> + GV = M->getNamedValue(Token.stringValue());
>> if (!GV)
>> return error(Twine("use of undefined global value '@") +
>> Token.rawStringValue() + "'");
>> @@ -716,10 +698,8 @@ bool MIParser::parseJumpTableIndexOperan
>> }
>>
>> bool MIParser::parseExternalSymbolOperand(MachineOperand &Dest) {
>> - assert(Token.is(MIToken::ExternalSymbol) ||
>> - Token.is(MIToken::QuotedExternalSymbol));
>> - StringValueUtility Name(Token);
>> - const char *Symbol = MF.createExternalSymbolName(Name);
>> + assert(Token.is(MIToken::ExternalSymbol));
>> + const char *Symbol = MF.createExternalSymbolName(Token.stringValue());
>> lex();
>> // TODO: Parse the target flags.
>> Dest = MachineOperand::CreateES(Symbol);
>> @@ -823,10 +803,9 @@ bool MIParser::parseCFIOperand(MachineOp
>>
>> bool MIParser::parseIRBlock(BasicBlock *&BB, const Function &F) {
>> switch (Token.kind()) {
>> - case MIToken::NamedIRBlock:
>> - case MIToken::QuotedNamedIRBlock: {
>> - StringValueUtility Name(Token);
>> - BB =
>> dyn_cast_or_null<BasicBlock>(F.getValueSymbolTable().lookup(Name));
>> + case MIToken::NamedIRBlock: {
>> + BB = dyn_cast_or_null<BasicBlock>(
>> + F.getValueSymbolTable().lookup(Token.stringValue()));
>> if (!BB)
>> return error(Twine("use of undefined IR block '%ir-block.") +
>> Token.rawStringValue() + "'");
>> @@ -854,8 +833,7 @@ bool MIParser::parseBlockAddressOperand(
>> if (expectAndConsume(MIToken::lparen))
>> return true;
>> if (Token.isNot(MIToken::GlobalValue) &&
>> - Token.isNot(MIToken::NamedGlobalValue) &&
>> - Token.isNot(MIToken::QuotedNamedGlobalValue))
>> + Token.isNot(MIToken::NamedGlobalValue))
>> return error("expected a global value");
>> GlobalValue *GV = nullptr;
>> if (parseGlobalValue(GV))
>> @@ -867,8 +845,7 @@ bool MIParser::parseBlockAddressOperand(
>> if (expectAndConsume(MIToken::comma))
>> return true;
>> BasicBlock *BB = nullptr;
>> - if (Token.isNot(MIToken::IRBlock) &&
>> Token.isNot(MIToken::NamedIRBlock) &&
>> - Token.isNot(MIToken::QuotedNamedIRBlock))
>> + if (Token.isNot(MIToken::IRBlock) &&
>> Token.isNot(MIToken::NamedIRBlock))
>> return error("expected an IR block reference");
>> if (parseIRBlock(BB, *F))
>> return true;
>> @@ -926,14 +903,12 @@ bool MIParser::parseMachineOperand(Machi
>> return parseFixedStackObjectOperand(Dest);
>> case MIToken::GlobalValue:
>> case MIToken::NamedGlobalValue:
>> - case MIToken::QuotedNamedGlobalValue:
>> return parseGlobalAddressOperand(Dest);
>> case MIToken::ConstantPoolItem:
>> return parseConstantPoolIndexOperand(Dest);
>> case MIToken::JumpTableIndex:
>> return parseJumpTableIndexOperand(Dest);
>> case MIToken::ExternalSymbol:
>> - case MIToken::QuotedExternalSymbol:
>> return parseExternalSymbolOperand(Dest);
>> case MIToken::exclaim:
>> return parseMetadataOperand(Dest);
>> @@ -964,10 +939,8 @@ bool MIParser::parseMachineOperand(Machi
>>
>> bool MIParser::parseIRValue(Value *&V) {
>> switch (Token.kind()) {
>> - case MIToken::NamedIRValue:
>> - case MIToken::QuotedNamedIRValue: {
>> - StringValueUtility Name(Token);
>> - V = MF.getFunction()->getValueSymbolTable().lookup(Name);
>> + case MIToken::NamedIRValue: {
>> + V =
>> MF.getFunction()->getValueSymbolTable().lookup(Token.stringValue());
>> if (!V)
>> return error(Twine("use of undefined IR value '%ir.") +
>> Token.rawStringValue() + "'");
>> @@ -1032,8 +1005,7 @@ bool MIParser::parseMachineMemoryOperand
>> lex();
>>
>> // TODO: Parse pseudo source values.
>> - if (Token.isNot(MIToken::NamedIRValue) &&
>> - Token.isNot(MIToken::QuotedNamedIRValue))
>> + if (Token.isNot(MIToken::NamedIRValue))
>> return error("expected an IR value reference");
>> Value *V = nullptr;
>> if (parseIRValue(V))
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150805/1228c8df/attachment.html>
More information about the llvm-commits
mailing list