[PATCH] D14480

Yunlian Jiang via cfe-commits cfe-commits at lists.llvm.org
Tue Nov 10 10:49:32 PST 2015


http://reviews.llvm.org/D14480

This tries to improve the compilation time of the test case in
https://llvm.org/bugs/show_bug.cgi?id=25416

The problem is that when trying to get the SrcLocInfo for asm, it
tries to get the SrcLoc for each
line inside the asm string. For each line, it goes through the token
table once. This is not optimal,
we can find the SrcLoc for all the lines of the asm with a single scan
of the token table. To do this,
we need to store the information of the location of current token and
the total size of token been
processed.

Index: include/clang/AST/Expr.h
===================================================================
--- include/clang/AST/Expr.h
+++ include/clang/AST/Expr.h
@@ -1611,7 +1611,10 @@
   ///
   SourceLocation getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
                                    const LangOptions &Features,
-                                   const TargetInfo &Target) const;
+                                   const TargetInfo &Target,
+                                   unsigned * StartToken=nullptr,
+                                   unsigned * ByteOffset=nullptr
+                                   ) const;

   typedef const SourceLocation *tokloc_iterator;
   tokloc_iterator tokloc_begin() const { return TokLocs; }
Index: lib/AST/Expr.cpp
===================================================================
--- lib/AST/Expr.cpp
+++ lib/AST/Expr.cpp
@@ -1009,13 +1009,21 @@
 ///
 SourceLocation StringLiteral::
 getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
-                  const LangOptions &Features, const TargetInfo
&Target) const {
+                  const LangOptions &Features, const TargetInfo &Target,
+                  unsigned * StartToken, unsigned * ByteOffset) const {
   assert((Kind == StringLiteral::Ascii || Kind == StringLiteral::UTF8) &&
          "Only narrow string literals are currently supported");

   // Loop over all of the tokens in this string until we find the one that
   // contains the byte we're looking for.
   unsigned TokNo = 0;
+  unsigned StringOffset = 0;
+  if (StartToken)
+    TokNo = *StartToken;
+  if (ByteOffset){
+    StringOffset = *ByteOffset;
+    ByteNo -= StringOffset;
+  }
   while (1) {
     assert(TokNo < getNumConcatenated() && "Invalid byte number!");
     SourceLocation StrTokLoc = getStrTokenLoc(TokNo);
@@ -1029,8 +1037,13 @@
     std::pair<FileID, unsigned> LocInfo
=SM.getDecomposedLoc(StrTokSpellingLoc);
     bool Invalid = false;
     StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
-    if (Invalid)
+    if (Invalid) {
+      if (ByteOffset != nullptr)
+        *ByteOffset = StringOffset;
+      if (StartToken != nullptr)
+        *StartToken = TokNo;
       return StrTokSpellingLoc;
+    }

     const char *StrData = Buffer.data()+LocInfo.second;

@@ -1051,10 +1064,15 @@

       // Now that we know the offset of the token in the spelling, use the
       // preprocessor to get the offset in the original source.
+      if (ByteOffset != nullptr)
+        *ByteOffset = StringOffset;
+      if (StartToken != nullptr)
+        *StartToken = TokNo;
       return Lexer::AdvanceToTokenCharacter(StrTokLoc, Offset, SM, Features);
     }

     // Move to the next string token.
+    StringOffset += TokNumBytes;
     ++TokNo;
     ByteNo -= TokNumBytes;
   }
Index: lib/CodeGen/CGStmt.cpp
===================================================================
--- lib/CodeGen/CGStmt.cpp
+++ lib/CodeGen/CGStmt.cpp
@@ -1707,13 +1707,17 @@
   if (!StrVal.empty()) {
     const SourceManager &SM = CGF.CGM.getContext().getSourceManager();
     const LangOptions &LangOpts = CGF.CGM.getLangOpts();
+    unsigned StartToken = 0;
+    unsigned ByteOffset = 0;

     // Add the location of the start of each subsequent line of the asm to the
     // MDNode.
     for (unsigned i = 0, e = StrVal.size()-1; i != e; ++i) {
       if (StrVal[i] != '\n') continue;
       SourceLocation LineLoc = Str->getLocationOfByte(i+1, SM, LangOpts,
-                                                      CGF.getTarget());
+                                                      CGF.getTarget(),
+                                                      &StartToken,
+                                                      &ByteOffset);
       Locs.push_back(llvm::ConstantAsMetadata::get(
           llvm::ConstantInt::get(CGF.Int32Ty, LineLoc.getRawEncoding())));
     }


More information about the cfe-commits mailing list