<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">2015-08-05 16:35 GMT-07:00 Sean Silva <span dir="ltr"><<a href="mailto:chisophugis@gmail.com" target="_blank">chisophugis@gmail.com</a>></span>:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote"><div><div class="h5">On Wed, Aug 5, 2015 at 10:35 AM, Alex Lorenz <span dir="ltr"><<a href="mailto:arphaman@gmail.com" target="_blank">arphaman@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: arphaman<br>
Date: Wed Aug 5 12:35:55 2015<br>
New Revision: 244068<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=244068&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project?rev=244068&view=rev</a><br>
Log:<br>
MIR Parser: Simplify the handling of quoted tokens. NFC.<br>
<br>
The machine instructions lexer should not expose the difference between quoted<br>
and unquoted tokens to the parser.<br>
<br>
Modified:<br>
llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp<br>
llvm/trunk/lib/CodeGen/MIRParser/MILexer.h<br>
llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp<br>
<br>
Modified: llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp?rev=244068&r1=244067&r2=244068&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp?rev=244068&r1=244067&r2=244068&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp (original)<br>
+++ llvm/trunk/lib/CodeGen/MIRParser/MILexer.cpp Wed Aug 5 12:35:55 2015<br>
@@ -69,13 +69,14 @@ static bool isIdentifierChar(char C) {<br>
C == '$';<br>
}<br>
<br>
-void MIToken::unescapeQuotedStringValue(std::string &Str) const {<br>
- assert(isStringValueQuoted() && "String value isn't quoted");<br>
- StringRef Value = Range.drop_front(StringOffset);<br>
+/// Unescapes the given string value.<br>
+///<br>
+/// Expects the string value to be quoted.<br>
+static std::string unescapeQuotedString(StringRef Value) {<br>
assert(Value.front() == '"' && Value.back() == '"');<br>
Cursor C = Cursor(Value.substr(1, Value.size() - 2));<br>
<br>
- Str.clear();<br>
+ std::string Str;<br>
Str.reserve(C.remaining().size());<br>
while (!C.isEOF()) {<br>
char Char = C.peek();<br>
@@ -95,6 +96,7 @@ void MIToken::unescapeQuotedStringValue(<br>
Str += Char;<br>
C.advance();<br>
}<br>
+ return Str;<br>
}<br>
<br>
/// Lex a string constant using the following regular expression: \"[^\"]*\"<br>
@@ -115,14 +117,16 @@ static Cursor lexStringConstant(<br>
}<br>
<br>
static Cursor lexName(<br>
- Cursor C, MIToken &Token, MIToken::TokenKind Type,<br>
- MIToken::TokenKind QuotedType, unsigned PrefixLength,<br>
+ Cursor C, MIToken &Token, MIToken::TokenKind Type, unsigned PrefixLength,<br>
function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {<br>
auto Range = C;<br>
C.advance(PrefixLength);<br>
if (C.peek() == '"') {<br>
if (Cursor R = lexStringConstant(C, ErrorCallback)) {<br>
- Token = MIToken(QuotedType, Range.upto(R), PrefixLength);<br>
+ StringRef String = Range.upto(R);<br>
+ Token = MIToken(Type, String,<br>
+ unescapeQuotedString(String.drop_front(PrefixLength)),<br>
+ PrefixLength);<br>
return R;<br>
}<br>
Token = MIToken(MIToken::Error, Range.remaining());<br>
@@ -257,8 +261,7 @@ static Cursor maybeLexIRBlock(<br>
return None;<br>
if (isdigit(C.peek(Rule.size())))<br>
return maybeLexIndex(C, Token, Rule, MIToken::IRBlock);<br>
- return lexName(C, Token, MIToken::NamedIRBlock, MIToken::QuotedNamedIRBlock,<br>
- Rule.size(), ErrorCallback);<br>
+ return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback);<br>
}<br>
<br>
static Cursor maybeLexIRValue(<br>
@@ -267,8 +270,7 @@ static Cursor maybeLexIRValue(<br>
const StringRef Rule = "%ir.";<br>
if (!C.remaining().startswith(Rule))<br>
return None;<br>
- return lexName(C, Token, MIToken::NamedIRValue, MIToken::QuotedNamedIRValue,<br>
- Rule.size(), ErrorCallback);<br>
+ return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback);<br>
}<br>
<br>
static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {<br>
@@ -302,8 +304,7 @@ static Cursor maybeLexGlobalValue(<br>
if (C.peek() != '@')<br>
return None;<br>
if (!isdigit(C.peek(1)))<br>
- return lexName(C, Token, MIToken::NamedGlobalValue,<br>
- MIToken::QuotedNamedGlobalValue, /*PrefixLength=*/1,<br>
+ return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1,<br>
ErrorCallback);<br>
auto Range = C;<br>
C.advance(1); // Skip the '@'<br>
@@ -320,9 +321,8 @@ static Cursor maybeLexExternalSymbol(<br>
function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {<br>
if (C.peek() != '$')<br>
return None;<br>
- return lexName(C, Token, MIToken::ExternalSymbol,<br>
- MIToken::QuotedExternalSymbol,<br>
- /*PrefixLength=*/1, ErrorCallback);<br>
+ return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1,<br>
+ ErrorCallback);<br>
}<br>
<br>
static bool isValidHexFloatingPointPrefix(char C) {<br>
<br>
Modified: llvm/trunk/lib/CodeGen/MIRParser/MILexer.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MIRParser/MILexer.h?rev=244068&r1=244067&r2=244068&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MIRParser/MILexer.h?rev=244068&r1=244067&r2=244068&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/CodeGen/MIRParser/MILexer.h (original)<br>
+++ llvm/trunk/lib/CodeGen/MIRParser/MILexer.h Wed Aug 5 12:35:55 2015<br>
@@ -70,10 +70,8 @@ struct MIToken {<br>
StackObject,<br>
FixedStackObject,<br>
NamedGlobalValue,<br>
- QuotedNamedGlobalValue,<br>
GlobalValue,<br>
ExternalSymbol,<br>
- QuotedExternalSymbol,<br>
<br>
// Other tokens<br>
IntegerLiteral,<br>
@@ -82,25 +80,32 @@ struct MIToken {<br>
ConstantPoolItem,<br>
JumpTableIndex,<br>
NamedIRBlock,<br>
- QuotedNamedIRBlock,<br>
IRBlock,<br>
NamedIRValue,<br>
- QuotedNamedIRValue,<br>
};<br>
<br>
private:<br>
TokenKind Kind;<br>
unsigned StringOffset;<br>
+ bool HasStringValue;<br>
StringRef Range;<br>
+ std::string StringValue;<br></blockquote><div><br></div></div></div><div>This seems really complicated to me (having to juggle StringOffset and a bool about whether to use the offset). Can we just have a `StringRef StrVal;` which either points into the source or to the owning buffer inside the token? Then `stringValue()` is just `return StrVal;`.</div></div></div></div></blockquote><div><br></div><div>Sure, I can do that. I just used one StringRef because I wanted to keep</div><div>the source range and string value in one string ref, but I can split them up</div><div>and simplify the logic in the stringValue method.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr"><div class="gmail_extra"><div class="gmail_quote"><div><div class="h5"><div><br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
APSInt IntVal;<br>
<br>
public:<br>
MIToken(TokenKind Kind, StringRef Range, unsigned StringOffset = 0)<br>
- : Kind(Kind), StringOffset(StringOffset), Range(Range) {}<br>
+ : Kind(Kind), StringOffset(StringOffset), HasStringValue(false),<br>
+ Range(Range) {}<br>
+<br>
+ MIToken(TokenKind Kind, StringRef Range, std::string StringValue,<br>
+ unsigned StringOffset = 0)<br>
+ : Kind(Kind), StringOffset(StringOffset), HasStringValue(true),<br>
+ Range(Range), StringValue(std::move(StringValue)) {}<br>
<br>
MIToken(TokenKind Kind, StringRef Range, const APSInt &IntVal,<br>
unsigned StringOffset = 0)<br>
- : Kind(Kind), StringOffset(StringOffset), Range(Range), IntVal(IntVal) {}<br>
+ : Kind(Kind), StringOffset(StringOffset), HasStringValue(false),<br>
+ Range(Range), IntVal(IntVal) {}<br>
<br>
TokenKind kind() const { return Kind; }<br>
<br>
@@ -124,30 +129,18 @@ public:<br>
<br>
StringRef::iterator location() const { return Range.begin(); }<br>
<br>
- bool isStringValueQuoted() const {<br>
- return Kind == QuotedNamedGlobalValue || Kind == QuotedExternalSymbol ||<br>
- Kind == QuotedNamedIRBlock || Kind == QuotedNamedIRValue;<br>
- }<br>
-<br>
/// Return the token's raw string value.<br>
///<br>
/// If the string value is quoted, this method returns that quoted string as<br>
/// it is, without unescaping the string value.<br>
StringRef rawStringValue() const { return Range.drop_front(StringOffset); }<br></blockquote><div><br></div></div></div><div>rawStringValue shouldn't be needed anymore, right?</div></div></div></div></blockquote><div><br></div><div>I use the rawStringValue for some of the error messages - so that</div><div>I can print out the full name (including quotes) in the error message.</div><div>But I think I can get rid of it if I use the full raw token in the error</div><div>message instead of just string value. I'll be able to get the raw token</div><div>from the range StringRef.</div><div><br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr"><div class="gmail_extra"><div class="gmail_quote"><span class="HOEnZb"><font color="#888888"><div><br></div><div>-- Sean Silva</div></font></span><div><div class="h5"><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
- /// Return token's string value.<br>
- ///<br>
- /// Expects the string value to be unquoted.<br>
+ /// Return the token's string value.<br>
StringRef stringValue() const {<br>
- assert(!isStringValueQuoted() && "String value is quoted");<br>
- return Range.drop_front(StringOffset);<br>
+ return HasStringValue ? StringRef(StringValue)<br>
+ : Range.drop_front(StringOffset);<br>
}<br>
<br>
- /// Unescapes the token's string value.<br>
- ///<br>
- /// Expects the string value to be quoted.<br>
- void unescapeQuotedStringValue(std::string &Str) const;<br>
-<br>
const APSInt &integerValue() const { return IntVal; }<br>
<br>
bool hasIntegerValue() const {<br>
<br>
Modified: llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp?rev=244068&r1=244067&r2=244068&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp?rev=244068&r1=244067&r2=244068&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp (original)<br>
+++ llvm/trunk/lib/CodeGen/MIRParser/MIParser.cpp Wed Aug 5 12:35:55 2015<br>
@@ -37,22 +37,6 @@ using namespace llvm;<br>
<br>
namespace {<br>
<br>
-struct StringValueUtility {<br>
- StringRef String;<br>
- std::string UnescapedString;<br>
-<br>
- StringValueUtility(const MIToken &Token) {<br>
- if (Token.isStringValueQuoted()) {<br>
- Token.unescapeQuotedStringValue(UnescapedString);<br>
- String = UnescapedString;<br>
- return;<br>
- }<br>
- String = Token.stringValue();<br>
- }<br>
-<br>
- operator StringRef() const { return String; }<br>
-};<br>
-<br>
/// A wrapper struct around the 'MachineOperand' struct that includes a source<br>
/// range.<br>
struct MachineOperandWithLocation {<br>
@@ -651,11 +635,9 @@ bool MIParser::parseFixedStackObjectOper<br>
<br>
bool MIParser::parseGlobalValue(GlobalValue *&GV) {<br>
switch (Token.kind()) {<br>
- case MIToken::NamedGlobalValue:<br>
- case MIToken::QuotedNamedGlobalValue: {<br>
- StringValueUtility Name(Token);<br>
+ case MIToken::NamedGlobalValue: {<br>
const Module *M = MF.getFunction()->getParent();<br>
- GV = M->getNamedValue(Name);<br>
+ GV = M->getNamedValue(Token.stringValue());<br>
if (!GV)<br>
return error(Twine("use of undefined global value '@") +<br>
Token.rawStringValue() + "'");<br>
@@ -716,10 +698,8 @@ bool MIParser::parseJumpTableIndexOperan<br>
}<br>
<br>
bool MIParser::parseExternalSymbolOperand(MachineOperand &Dest) {<br>
- assert(Token.is(MIToken::ExternalSymbol) ||<br>
- Token.is(MIToken::QuotedExternalSymbol));<br>
- StringValueUtility Name(Token);<br>
- const char *Symbol = MF.createExternalSymbolName(Name);<br>
+ assert(Token.is(MIToken::ExternalSymbol));<br>
+ const char *Symbol = MF.createExternalSymbolName(Token.stringValue());<br>
lex();<br>
// TODO: Parse the target flags.<br>
Dest = MachineOperand::CreateES(Symbol);<br>
@@ -823,10 +803,9 @@ bool MIParser::parseCFIOperand(MachineOp<br>
<br>
bool MIParser::parseIRBlock(BasicBlock *&BB, const Function &F) {<br>
switch (Token.kind()) {<br>
- case MIToken::NamedIRBlock:<br>
- case MIToken::QuotedNamedIRBlock: {<br>
- StringValueUtility Name(Token);<br>
- BB = dyn_cast_or_null<BasicBlock>(F.getValueSymbolTable().lookup(Name));<br>
+ case MIToken::NamedIRBlock: {<br>
+ BB = dyn_cast_or_null<BasicBlock>(<br>
+ F.getValueSymbolTable().lookup(Token.stringValue()));<br>
if (!BB)<br>
return error(Twine("use of undefined IR block '%ir-block.") +<br>
Token.rawStringValue() + "'");<br>
@@ -854,8 +833,7 @@ bool MIParser::parseBlockAddressOperand(<br>
if (expectAndConsume(MIToken::lparen))<br>
return true;<br>
if (Token.isNot(MIToken::GlobalValue) &&<br>
- Token.isNot(MIToken::NamedGlobalValue) &&<br>
- Token.isNot(MIToken::QuotedNamedGlobalValue))<br>
+ Token.isNot(MIToken::NamedGlobalValue))<br>
return error("expected a global value");<br>
GlobalValue *GV = nullptr;<br>
if (parseGlobalValue(GV))<br>
@@ -867,8 +845,7 @@ bool MIParser::parseBlockAddressOperand(<br>
if (expectAndConsume(MIToken::comma))<br>
return true;<br>
BasicBlock *BB = nullptr;<br>
- if (Token.isNot(MIToken::IRBlock) && Token.isNot(MIToken::NamedIRBlock) &&<br>
- Token.isNot(MIToken::QuotedNamedIRBlock))<br>
+ if (Token.isNot(MIToken::IRBlock) && Token.isNot(MIToken::NamedIRBlock))<br>
return error("expected an IR block reference");<br>
if (parseIRBlock(BB, *F))<br>
return true;<br>
@@ -926,14 +903,12 @@ bool MIParser::parseMachineOperand(Machi<br>
return parseFixedStackObjectOperand(Dest);<br>
case MIToken::GlobalValue:<br>
case MIToken::NamedGlobalValue:<br>
- case MIToken::QuotedNamedGlobalValue:<br>
return parseGlobalAddressOperand(Dest);<br>
case MIToken::ConstantPoolItem:<br>
return parseConstantPoolIndexOperand(Dest);<br>
case MIToken::JumpTableIndex:<br>
return parseJumpTableIndexOperand(Dest);<br>
case MIToken::ExternalSymbol:<br>
- case MIToken::QuotedExternalSymbol:<br>
return parseExternalSymbolOperand(Dest);<br>
case MIToken::exclaim:<br>
return parseMetadataOperand(Dest);<br>
@@ -964,10 +939,8 @@ bool MIParser::parseMachineOperand(Machi<br>
<br>
bool MIParser::parseIRValue(Value *&V) {<br>
switch (Token.kind()) {<br>
- case MIToken::NamedIRValue:<br>
- case MIToken::QuotedNamedIRValue: {<br>
- StringValueUtility Name(Token);<br>
- V = MF.getFunction()->getValueSymbolTable().lookup(Name);<br>
+ case MIToken::NamedIRValue: {<br>
+ V = MF.getFunction()->getValueSymbolTable().lookup(Token.stringValue());<br>
if (!V)<br>
return error(Twine("use of undefined IR value '%ir.") +<br>
Token.rawStringValue() + "'");<br>
@@ -1032,8 +1005,7 @@ bool MIParser::parseMachineMemoryOperand<br>
lex();<br>
<br>
// TODO: Parse pseudo source values.<br>
- if (Token.isNot(MIToken::NamedIRValue) &&<br>
- Token.isNot(MIToken::QuotedNamedIRValue))<br>
+ if (Token.isNot(MIToken::NamedIRValue))<br>
return error("expected an IR value reference");<br>
Value *V = nullptr;<br>
if (parseIRValue(V))<br>
<br>
<br>
_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits</a><br>
</blockquote></div></div></div><br></div></div>
</blockquote></div><br></div></div>