[llvm] r244068 - MIR Parser: Simplify the handling of quoted tokens. NFC.
Sean Silva
chisophugis at gmail.com
Wed Aug 5 16:35:12 PDT 2015
On Wed, Aug 5, 2015 at 10:35 AM, Alex Lorenz <arphaman at gmail.com> wrote:
> Author: arphaman
> Date: Wed Aug 5 12:35:55 2015
> New Revision: 244068
>
> URL: http://llvm.org/viewvc/llvm-project?rev=244068&view=rev
> Log:
> MIR Parser: Simplify the handling of quoted tokens. NFC.
>
> The machine instructions lexer should not expose the difference between
> quoted
> and unquoted tokens to the parser.
>
> Modified:
> llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp
> llvm/trunk/lib/CodeGen/MIRParser/MILexer.h
> llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp
>
> Modified: llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp?rev=244068&r1=244067&r2=244068&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp (original)
> +++ llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp Wed Aug 5 12:35:55 2015
> @@ -69,13 +69,14 @@ static bool isIdentifierChar(char C) {
> C == '$';
> }
>
> -void MIToken::unescapeQuotedStringValue(std::string &Str) const {
> - assert(isStringValueQuoted() && "String value isn't quoted");
> - StringRef Value = Range.drop_front(StringOffset);
> +/// Unescapes the given string value.
> +///
> +/// Expects the string value to be quoted.
> +static std::string unescapeQuotedString(StringRef Value) {
> assert(Value.front() == '"' && Value.back() == '"');
> Cursor C = Cursor(Value.substr(1, Value.size() - 2));
>
> - Str.clear();
> + std::string Str;
> Str.reserve(C.remaining().size());
> while (!C.isEOF()) {
> char Char = C.peek();
> @@ -95,6 +96,7 @@ void MIToken::unescapeQuotedStringValue(
> Str += Char;
> C.advance();
> }
> + return Str;
> }
>
> /// Lex a string constant using the following regular expression:
> \"[^\"]*\"
> @@ -115,14 +117,16 @@ static Cursor lexStringConstant(
> }
>
> static Cursor lexName(
> - Cursor C, MIToken &Token, MIToken::TokenKind Type,
> - MIToken::TokenKind QuotedType, unsigned PrefixLength,
> + Cursor C, MIToken &Token, MIToken::TokenKind Type, unsigned
> PrefixLength,
> function_ref<void(StringRef::iterator Loc, const Twine &)>
> ErrorCallback) {
> auto Range = C;
> C.advance(PrefixLength);
> if (C.peek() == '"') {
> if (Cursor R = lexStringConstant(C, ErrorCallback)) {
> - Token = MIToken(QuotedType, Range.upto(R), PrefixLength);
> + StringRef String = Range.upto(R);
> + Token = MIToken(Type, String,
> +
> unescapeQuotedString(String.drop_front(PrefixLength)),
> + PrefixLength);
> return R;
> }
> Token = MIToken(MIToken::Error, Range.remaining());
> @@ -257,8 +261,7 @@ static Cursor maybeLexIRBlock(
> return None;
> if (isdigit(C.peek(Rule.size())))
> return maybeLexIndex(C, Token, Rule, MIToken::IRBlock);
> - return lexName(C, Token, MIToken::NamedIRBlock,
> MIToken::QuotedNamedIRBlock,
> - Rule.size(), ErrorCallback);
> + return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(),
> ErrorCallback);
> }
>
> static Cursor maybeLexIRValue(
> @@ -267,8 +270,7 @@ static Cursor maybeLexIRValue(
> const StringRef Rule = "%ir.";
> if (!C.remaining().startswith(Rule))
> return None;
> - return lexName(C, Token, MIToken::NamedIRValue,
> MIToken::QuotedNamedIRValue,
> - Rule.size(), ErrorCallback);
> + return lexName(C, Token, MIToken::NamedIRValue, Rule.size(),
> ErrorCallback);
> }
>
> static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
> @@ -302,8 +304,7 @@ static Cursor maybeLexGlobalValue(
> if (C.peek() != '@')
> return None;
> if (!isdigit(C.peek(1)))
> - return lexName(C, Token, MIToken::NamedGlobalValue,
> - MIToken::QuotedNamedGlobalValue, /*PrefixLength=*/1,
> + return lexName(C, Token, MIToken::NamedGlobalValue,
> /*PrefixLength=*/1,
> ErrorCallback);
> auto Range = C;
> C.advance(1); // Skip the '@'
> @@ -320,9 +321,8 @@ static Cursor maybeLexExternalSymbol(
> function_ref<void(StringRef::iterator Loc, const Twine &)>
> ErrorCallback) {
> if (C.peek() != '$')
> return None;
> - return lexName(C, Token, MIToken::ExternalSymbol,
> - MIToken::QuotedExternalSymbol,
> - /*PrefixLength=*/1, ErrorCallback);
> + return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1,
> + ErrorCallback);
> }
>
> static bool isValidHexFloatingPointPrefix(char C) {
>
> Modified: llvm/trunk/lib/CodeGen/MIRParser/MILexer.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MIRParser/MILexer.h?rev=244068&r1=244067&r2=244068&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/MIRParser/MILexer.h (original)
> +++ llvm/trunk/lib/CodeGen/MIRParser/MILexer.h Wed Aug 5 12:35:55 2015
> @@ -70,10 +70,8 @@ struct MIToken {
> StackObject,
> FixedStackObject,
> NamedGlobalValue,
> - QuotedNamedGlobalValue,
> GlobalValue,
> ExternalSymbol,
> - QuotedExternalSymbol,
>
> // Other tokens
> IntegerLiteral,
> @@ -82,25 +80,32 @@ struct MIToken {
> ConstantPoolItem,
> JumpTableIndex,
> NamedIRBlock,
> - QuotedNamedIRBlock,
> IRBlock,
> NamedIRValue,
> - QuotedNamedIRValue,
> };
>
> private:
> TokenKind Kind;
> unsigned StringOffset;
> + bool HasStringValue;
> StringRef Range;
> + std::string StringValue;
>
This seems really complicated to me (having to juggle StringOffset and a
bool about whether to use the offset). Can we just have a `StringRef
StrVal;` which either points into the source or to the owning buffer inside
the token? Then `stringValue()` is just `return StrVal;`.
> APSInt IntVal;
>
> public:
> MIToken(TokenKind Kind, StringRef Range, unsigned StringOffset = 0)
> - : Kind(Kind), StringOffset(StringOffset), Range(Range) {}
> + : Kind(Kind), StringOffset(StringOffset), HasStringValue(false),
> + Range(Range) {}
> +
> + MIToken(TokenKind Kind, StringRef Range, std::string StringValue,
> + unsigned StringOffset = 0)
> + : Kind(Kind), StringOffset(StringOffset), HasStringValue(true),
> + Range(Range), StringValue(std::move(StringValue)) {}
>
> MIToken(TokenKind Kind, StringRef Range, const APSInt &IntVal,
> unsigned StringOffset = 0)
> - : Kind(Kind), StringOffset(StringOffset), Range(Range),
> IntVal(IntVal) {}
> + : Kind(Kind), StringOffset(StringOffset), HasStringValue(false),
> + Range(Range), IntVal(IntVal) {}
>
> TokenKind kind() const { return Kind; }
>
> @@ -124,30 +129,18 @@ public:
>
> StringRef::iterator location() const { return Range.begin(); }
>
> - bool isStringValueQuoted() const {
> - return Kind == QuotedNamedGlobalValue || Kind == QuotedExternalSymbol
> ||
> - Kind == QuotedNamedIRBlock || Kind == QuotedNamedIRValue;
> - }
> -
> /// Return the token's raw string value.
> ///
> /// If the string value is quoted, this method returns that quoted
> string as
> /// it is, without unescaping the string value.
> StringRef rawStringValue() const { return
> Range.drop_front(StringOffset); }
>
rawStringValue shouldn't be needed anymore, right?
-- Sean Silva
>
> - /// Return token's string value.
> - ///
> - /// Expects the string value to be unquoted.
> + /// Return the token's string value.
> StringRef stringValue() const {
> - assert(!isStringValueQuoted() && "String value is quoted");
> - return Range.drop_front(StringOffset);
> + return HasStringValue ? StringRef(StringValue)
> + : Range.drop_front(StringOffset);
> }
>
> - /// Unescapes the token's string value.
> - ///
> - /// Expects the string value to be quoted.
> - void unescapeQuotedStringValue(std::string &Str) const;
> -
> const APSInt &integerValue() const { return IntVal; }
>
> bool hasIntegerValue() const {
>
> Modified: llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp?rev=244068&r1=244067&r2=244068&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp (original)
> +++ llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp Wed Aug 5 12:35:55 2015
> @@ -37,22 +37,6 @@ using namespace llvm;
>
> namespace {
>
> -struct StringValueUtility {
> - StringRef String;
> - std::string UnescapedString;
> -
> - StringValueUtility(const MIToken &Token) {
> - if (Token.isStringValueQuoted()) {
> - Token.unescapeQuotedStringValue(UnescapedString);
> - String = UnescapedString;
> - return;
> - }
> - String = Token.stringValue();
> - }
> -
> - operator StringRef() const { return String; }
> -};
> -
> /// A wrapper struct around the 'MachineOperand' struct that includes a
> source
> /// range.
> struct MachineOperandWithLocation {
> @@ -651,11 +635,9 @@ bool MIParser::parseFixedStackObjectOper
>
> bool MIParser::parseGlobalValue(GlobalValue *&GV) {
> switch (Token.kind()) {
> - case MIToken::NamedGlobalValue:
> - case MIToken::QuotedNamedGlobalValue: {
> - StringValueUtility Name(Token);
> + case MIToken::NamedGlobalValue: {
> const Module *M = MF.getFunction()->getParent();
> - GV = M->getNamedValue(Name);
> + GV = M->getNamedValue(Token.stringValue());
> if (!GV)
> return error(Twine("use of undefined global value '@") +
> Token.rawStringValue() + "'");
> @@ -716,10 +698,8 @@ bool MIParser::parseJumpTableIndexOperan
> }
>
> bool MIParser::parseExternalSymbolOperand(MachineOperand &Dest) {
> - assert(Token.is(MIToken::ExternalSymbol) ||
> - Token.is(MIToken::QuotedExternalSymbol));
> - StringValueUtility Name(Token);
> - const char *Symbol = MF.createExternalSymbolName(Name);
> + assert(Token.is(MIToken::ExternalSymbol));
> + const char *Symbol = MF.createExternalSymbolName(Token.stringValue());
> lex();
> // TODO: Parse the target flags.
> Dest = MachineOperand::CreateES(Symbol);
> @@ -823,10 +803,9 @@ bool MIParser::parseCFIOperand(MachineOp
>
> bool MIParser::parseIRBlock(BasicBlock *&BB, const Function &F) {
> switch (Token.kind()) {
> - case MIToken::NamedIRBlock:
> - case MIToken::QuotedNamedIRBlock: {
> - StringValueUtility Name(Token);
> - BB =
> dyn_cast_or_null<BasicBlock>(F.getValueSymbolTable().lookup(Name));
> + case MIToken::NamedIRBlock: {
> + BB = dyn_cast_or_null<BasicBlock>(
> + F.getValueSymbolTable().lookup(Token.stringValue()));
> if (!BB)
> return error(Twine("use of undefined IR block '%ir-block.") +
> Token.rawStringValue() + "'");
> @@ -854,8 +833,7 @@ bool MIParser::parseBlockAddressOperand(
> if (expectAndConsume(MIToken::lparen))
> return true;
> if (Token.isNot(MIToken::GlobalValue) &&
> - Token.isNot(MIToken::NamedGlobalValue) &&
> - Token.isNot(MIToken::QuotedNamedGlobalValue))
> + Token.isNot(MIToken::NamedGlobalValue))
> return error("expected a global value");
> GlobalValue *GV = nullptr;
> if (parseGlobalValue(GV))
> @@ -867,8 +845,7 @@ bool MIParser::parseBlockAddressOperand(
> if (expectAndConsume(MIToken::comma))
> return true;
> BasicBlock *BB = nullptr;
> - if (Token.isNot(MIToken::IRBlock) && Token.isNot(MIToken::NamedIRBlock)
> &&
> - Token.isNot(MIToken::QuotedNamedIRBlock))
> + if (Token.isNot(MIToken::IRBlock) && Token.isNot(MIToken::NamedIRBlock))
> return error("expected an IR block reference");
> if (parseIRBlock(BB, *F))
> return true;
> @@ -926,14 +903,12 @@ bool MIParser::parseMachineOperand(Machi
> return parseFixedStackObjectOperand(Dest);
> case MIToken::GlobalValue:
> case MIToken::NamedGlobalValue:
> - case MIToken::QuotedNamedGlobalValue:
> return parseGlobalAddressOperand(Dest);
> case MIToken::ConstantPoolItem:
> return parseConstantPoolIndexOperand(Dest);
> case MIToken::JumpTableIndex:
> return parseJumpTableIndexOperand(Dest);
> case MIToken::ExternalSymbol:
> - case MIToken::QuotedExternalSymbol:
> return parseExternalSymbolOperand(Dest);
> case MIToken::exclaim:
> return parseMetadataOperand(Dest);
> @@ -964,10 +939,8 @@ bool MIParser::parseMachineOperand(Machi
>
> bool MIParser::parseIRValue(Value *&V) {
> switch (Token.kind()) {
> - case MIToken::NamedIRValue:
> - case MIToken::QuotedNamedIRValue: {
> - StringValueUtility Name(Token);
> - V = MF.getFunction()->getValueSymbolTable().lookup(Name);
> + case MIToken::NamedIRValue: {
> + V =
> MF.getFunction()->getValueSymbolTable().lookup(Token.stringValue());
> if (!V)
> return error(Twine("use of undefined IR value '%ir.") +
> Token.rawStringValue() + "'");
> @@ -1032,8 +1005,7 @@ bool MIParser::parseMachineMemoryOperand
> lex();
>
> // TODO: Parse pseudo source values.
> - if (Token.isNot(MIToken::NamedIRValue) &&
> - Token.isNot(MIToken::QuotedNamedIRValue))
> + if (Token.isNot(MIToken::NamedIRValue))
> return error("expected an IR value reference");
> Value *V = nullptr;
> if (parseIRValue(V))
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150805/55c0736c/attachment.html>
More information about the llvm-commits
mailing list