r255198 - PR25416: Improve performance of processing inline assembly consisting of many
Richard Smith via cfe-commits
cfe-commits at lists.llvm.org
Wed Dec 9 17:11:48 PST 2015
Author: rsmith
Date: Wed Dec 9 19:11:47 2015
New Revision: 255198
URL: http://llvm.org/viewvc/llvm-project?rev=255198&view=rev
Log:
PR25416: Improve performance of processing inline assembly consisting of many
implicitly-concatenated string literals. When looking for the start of a token
in the inline assembly, start from the end of the previous token, not the start
of the entire string.
Patch by Yunlian Jiang!
Modified:
cfe/trunk/include/clang/AST/Expr.h
cfe/trunk/lib/AST/Expr.cpp
cfe/trunk/lib/CodeGen/CGStmt.cpp
Modified: cfe/trunk/include/clang/AST/Expr.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/Expr.h?rev=255198&r1=255197&r2=255198&view=diff
==============================================================================
--- cfe/trunk/include/clang/AST/Expr.h (original)
+++ cfe/trunk/include/clang/AST/Expr.h Wed Dec 9 19:11:47 2015
@@ -1631,13 +1631,15 @@ public:
/// and can have escape sequences in them in addition to the usual trigraph
/// and escaped newline business. This routine handles this complexity.
///
- SourceLocation getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
- const LangOptions &Features,
- const TargetInfo &Target) const;
+ SourceLocation
+ getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
+ const LangOptions &Features, const TargetInfo &Target,
+ unsigned *StartToken = nullptr,
+ unsigned *StartTokenByteOffset = nullptr) const;
typedef const SourceLocation *tokloc_iterator;
tokloc_iterator tokloc_begin() const { return TokLocs; }
- tokloc_iterator tokloc_end() const { return TokLocs+NumConcatenated; }
+ tokloc_iterator tokloc_end() const { return TokLocs + NumConcatenated; }
SourceLocation getLocStart() const LLVM_READONLY { return TokLocs[0]; }
SourceLocation getLocEnd() const LLVM_READONLY {
Modified: cfe/trunk/lib/AST/Expr.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/Expr.cpp?rev=255198&r1=255197&r2=255198&view=diff
==============================================================================
--- cfe/trunk/lib/AST/Expr.cpp (original)
+++ cfe/trunk/lib/AST/Expr.cpp Wed Dec 9 19:11:47 2015
@@ -1007,15 +1007,33 @@ void StringLiteral::setString(const ASTC
/// can have escape sequences in them in addition to the usual trigraph and
/// escaped newline business. This routine handles this complexity.
///
-SourceLocation StringLiteral::
-getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
- const LangOptions &Features, const TargetInfo &Target) const {
+/// The *StartToken sets the first token to be searched in this function and
+/// the *StartTokenByteOffset is the byte offset of the first token. Before
+/// returning, it updates the *StartToken to the TokNo of the token being found
+/// and sets *StartTokenByteOffset to the byte offset of the token in the
+/// string.
+/// Using these two parameters can reduce the time complexity from O(n^2) to
+/// O(n) if one wants to get the location of byte for all the tokens in a
+/// string.
+///
+SourceLocation
+StringLiteral::getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
+ const LangOptions &Features,
+ const TargetInfo &Target, unsigned *StartToken,
+ unsigned *StartTokenByteOffset) const {
assert((Kind == StringLiteral::Ascii || Kind == StringLiteral::UTF8) &&
"Only narrow string literals are currently supported");
// Loop over all of the tokens in this string until we find the one that
// contains the byte we're looking for.
unsigned TokNo = 0;
+ unsigned StringOffset = 0;
+ if (StartToken)
+ TokNo = *StartToken;
+ if (StartTokenByteOffset) {
+ StringOffset = *StartTokenByteOffset;
+ ByteNo -= StringOffset;
+ }
while (1) {
assert(TokNo < getNumConcatenated() && "Invalid byte number!");
SourceLocation StrTokLoc = getStrTokenLoc(TokNo);
@@ -1024,14 +1042,20 @@ getLocationOfByte(unsigned ByteNo, const
// the string literal, not the identifier for the macro it is potentially
// expanded through.
SourceLocation StrTokSpellingLoc = SM.getSpellingLoc(StrTokLoc);
-
+
// Re-lex the token to get its length and original spelling.
- std::pair<FileID, unsigned> LocInfo =SM.getDecomposedLoc(StrTokSpellingLoc);
+ std::pair<FileID, unsigned> LocInfo =
+ SM.getDecomposedLoc(StrTokSpellingLoc);
bool Invalid = false;
StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
- if (Invalid)
+ if (Invalid) {
+ if (StartTokenByteOffset != nullptr)
+ *StartTokenByteOffset = StringOffset;
+ if (StartToken != nullptr)
+ *StartToken = TokNo;
return StrTokSpellingLoc;
-
+ }
+
const char *StrData = Buffer.data()+LocInfo.second;
// Create a lexer starting at the beginning of this token.
@@ -1047,14 +1071,19 @@ getLocationOfByte(unsigned ByteNo, const
// If the byte is in this token, return the location of the byte.
if (ByteNo < TokNumBytes ||
(ByteNo == TokNumBytes && TokNo == getNumConcatenated() - 1)) {
- unsigned Offset = SLP.getOffsetOfStringByte(TheTok, ByteNo);
-
+ unsigned Offset = SLP.getOffsetOfStringByte(TheTok, ByteNo);
+
// Now that we know the offset of the token in the spelling, use the
// preprocessor to get the offset in the original source.
+ if (StartTokenByteOffset != nullptr)
+ *StartTokenByteOffset = StringOffset;
+ if (StartToken != nullptr)
+ *StartToken = TokNo;
return Lexer::AdvanceToTokenCharacter(StrTokLoc, Offset, SM, Features);
}
-
+
// Move to the next string token.
+ StringOffset += TokNumBytes;
++TokNo;
ByteNo -= TokNumBytes;
}
Modified: cfe/trunk/lib/CodeGen/CGStmt.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGStmt.cpp?rev=255198&r1=255197&r2=255198&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGStmt.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGStmt.cpp Wed Dec 9 19:11:47 2015
@@ -1714,13 +1714,15 @@ static llvm::MDNode *getAsmSrcLocInfo(co
if (!StrVal.empty()) {
const SourceManager &SM = CGF.CGM.getContext().getSourceManager();
const LangOptions &LangOpts = CGF.CGM.getLangOpts();
+ unsigned StartToken = 0;
+ unsigned ByteOffset = 0;
// Add the location of the start of each subsequent line of the asm to the
// MDNode.
- for (unsigned i = 0, e = StrVal.size()-1; i != e; ++i) {
+ for (unsigned i = 0, e = StrVal.size() - 1; i != e; ++i) {
if (StrVal[i] != '\n') continue;
- SourceLocation LineLoc = Str->getLocationOfByte(i+1, SM, LangOpts,
- CGF.getTarget());
+ SourceLocation LineLoc = Str->getLocationOfByte(
+ i + 1, SM, LangOpts, CGF.getTarget(), &StartToken, &ByteOffset);
Locs.push_back(llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(CGF.Int32Ty, LineLoc.getRawEncoding())));
}
More information about the cfe-commits
mailing list