[clang] b4c83a1 - [Tooling/DependencyScanning & Preprocessor] Refactor dependency scanning to produce pre-lexed preprocessor directive tokens, instead of minimized sources

Argyrios Kyrtzidis via cfe-commits cfe-commits at lists.llvm.org
Thu May 26 13:01:22 PDT 2022


Author: Argyrios Kyrtzidis
Date: 2022-05-26T12:50:06-07:00
New Revision: b4c83a13f664582015ea22924b9a0c6290d41f5b

URL: https://github.com/llvm/llvm-project/commit/b4c83a13f664582015ea22924b9a0c6290d41f5b
DIFF: https://github.com/llvm/llvm-project/commit/b4c83a13f664582015ea22924b9a0c6290d41f5b.diff

LOG: [Tooling/DependencyScanning & Preprocessor] Refactor dependency scanning to produce pre-lexed preprocessor directive tokens, instead of minimized sources

This is a commit with the following changes:

* Remove `ExcludedPreprocessorDirectiveSkipMapping` and related functionality

Removes `ExcludedPreprocessorDirectiveSkipMapping`; its intended benefit for fast skipping of excluded directived blocks
will be superseded by a follow-up patch in the series that will use dependency scanning lexing for the same purpose.

* Refactor dependency scanning to produce pre-lexed preprocessor directive tokens, instead of minimized sources

Replaces the "source minimization" mechanism with a mechanism that produces lexed dependency directives tokens.

* Make the special lexing for dependency scanning a first-class feature of the `Preprocessor` and `Lexer`

This is bringing the following benefits:

    * Full access to the preprocessor state during dependency scanning. E.g. a component can see what includes were taken and where they were located in the actual sources.
    * Improved performance for dependency scanning. Measurements with a release+thin-LTO build shows ~ -11% reduction in wall time.
    * Opportunity to use dependency scanning lexing to speed-up skipping of excluded conditional blocks during normal preprocessing (as follow-up, not part of this patch).

For normal preprocessing measurements show differences are below the noise level.

Since, after this change, we don't minimize sources and pass them in place of the real sources, `DependencyScanningFilesystem` is not technically necessary, but it has valuable performance benefits for caching file `stat`s along with the results of scanning the sources. So the setup of using the `DependencyScanningFilesystem` during a dependency scan remains.

Differential Revision: https://reviews.llvm.org/D125486
Differential Revision: https://reviews.llvm.org/D125487
Differential Revision: https://reviews.llvm.org/D125488

Added: 
    

Modified: 
    clang/include/clang/Lex/DependencyDirectivesScanner.h
    clang/include/clang/Lex/Lexer.h
    clang/include/clang/Lex/Preprocessor.h
    clang/include/clang/Lex/PreprocessorOptions.h
    clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
    clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
    clang/lib/Frontend/FrontendActions.cpp
    clang/lib/Lex/DependencyDirectivesScanner.cpp
    clang/lib/Lex/Lexer.cpp
    clang/lib/Lex/PPDirectives.cpp
    clang/lib/Lex/PPLexerChange.cpp
    clang/lib/Lex/Preprocessor.cpp
    clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
    clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
    clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c
    clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c
    clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
    clang/unittests/Tooling/DependencyScannerTest.cpp

Removed: 
    clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h


################################################################################
diff  --git a/clang/include/clang/Lex/DependencyDirectivesScanner.h b/clang/include/clang/Lex/DependencyDirectivesScanner.h
index b65891c6b8aba..1ea7e79a0d682 100644
--- a/clang/include/clang/Lex/DependencyDirectivesScanner.h
+++ b/clang/include/clang/Lex/DependencyDirectivesScanner.h
@@ -19,15 +19,41 @@
 
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
 
 namespace clang {
 
+namespace tok {
+enum TokenKind : unsigned short;
+}
+
 class DiagnosticsEngine;
 
 namespace dependency_directives_scan {
 
+/// Token lexed as part of dependency directive scanning.
+struct Token {
+  /// Offset into the original source input.
+  unsigned Offset;
+  unsigned Length;
+  tok::TokenKind Kind;
+  unsigned short Flags;
+
+  Token(unsigned Offset, unsigned Length, tok::TokenKind Kind,
+        unsigned short Flags)
+      : Offset(Offset), Length(Length), Kind(Kind), Flags(Flags) {}
+
+  unsigned getEnd() const { return Offset + Length; }
+
+  bool is(tok::TokenKind K) const { return Kind == K; }
+  bool isNot(tok::TokenKind K) const { return Kind != K; }
+  bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
+    return is(K1) || is(K2);
+  }
+  template <typename... Ts> bool isOneOf(tok::TokenKind K1, Ts... Ks) const {
+    return is(K1) || isOneOf(Ks...);
+  }
+};
+
 /// Represents the kind of preprocessor directive or a module declaration that
 /// is tracked by the scanner in its token output.
 enum DirectiveKind : uint8_t {
@@ -52,9 +78,10 @@ enum DirectiveKind : uint8_t {
   pp_else,
   pp_endif,
   decl_at_import,
-  cxx_export_decl,
   cxx_module_decl,
   cxx_import_decl,
+  cxx_export_module_decl,
+  cxx_export_import_decl,
   pp_eof,
 };
 
@@ -62,53 +89,48 @@ enum DirectiveKind : uint8_t {
 /// scanning. It's used to track various preprocessor directives that could
 /// potentially have an effect on the depedencies.
 struct Directive {
+  ArrayRef<Token> Tokens;
+
   /// The kind of token.
   DirectiveKind Kind = pp_none;
 
-  /// Offset into the output byte stream of where the directive begins.
-  int Offset = -1;
-
-  Directive(DirectiveKind K, int Offset) : Kind(K), Offset(Offset) {}
-};
-
-/// Simplified token range to track the range of a potentially skippable PP
-/// directive.
-struct SkippedRange {
-  /// Offset into the output byte stream of where the skipped directive begins.
-  int Offset;
-
-  /// The number of bytes that can be skipped before the preprocessing must
-  /// resume.
-  int Length;
+  Directive() = default;
+  Directive(DirectiveKind K, ArrayRef<Token> Tokens)
+      : Tokens(Tokens), Kind(K) {}
 };
 
-/// Computes the potential source ranges that can be skipped by the preprocessor
-/// when skipping a directive like #if, #ifdef or #elsif.
-///
-/// \returns false on success, true on error.
-bool computeSkippedRanges(ArrayRef<Directive> Input,
-                          llvm::SmallVectorImpl<SkippedRange> &Range);
-
 } // end namespace dependency_directives_scan
 
-/// Minimize the input down to the preprocessor directives that might have
+/// Scan the input for the preprocessor directives that might have
 /// an effect on the dependencies for a compilation unit.
 ///
-/// This function deletes all non-preprocessor code, and strips anything that
-/// can't affect what gets included. It canonicalizes whitespace where
-/// convenient to stabilize the output against formatting changes in the input.
-///
-/// Clears the output vectors at the beginning of the call.
+/// This function ignores all non-preprocessor code and anything that
+/// can't affect what gets included.
 ///
 /// \returns false on success, true on error. If the diagnostic engine is not
 /// null, an appropriate error is reported using the given input location
-/// with the offset that corresponds to the minimizer's current buffer offset.
+/// with the offset that corresponds to the \p Input buffer offset.
 bool scanSourceForDependencyDirectives(
-    llvm::StringRef Input, llvm::SmallVectorImpl<char> &Output,
-    llvm::SmallVectorImpl<dependency_directives_scan::Directive> &Directives,
+    StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
+    SmallVectorImpl<dependency_directives_scan::Directive> &Directives,
     DiagnosticsEngine *Diags = nullptr,
     SourceLocation InputSourceLoc = SourceLocation());
 
+/// Print the previously scanned dependency directives as minimized source text.
+///
+/// \param Source The original source text that the dependency directives were
+/// scanned from.
+/// \param Directives The previously scanned dependency
+/// directives.
+/// \param OS the stream to print the dependency directives on.
+///
+/// This is used primarily for testing purposes, during dependency scanning the
+/// \p Lexer uses the tokens directly, not their printed version.
+void printDependencyDirectivesAsSource(
+    StringRef Source,
+    ArrayRef<dependency_directives_scan::Directive> Directives,
+    llvm::raw_ostream &OS);
+
 } // end namespace clang
 
 #endif // LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSCANNER_H

diff  --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index 76612184bdffa..d04b332934655 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -16,6 +16,7 @@
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/TokenKinds.h"
+#include "clang/Lex/DependencyDirectivesScanner.h"
 #include "clang/Lex/PreprocessorLexer.h"
 #include "clang/Lex/Token.h"
 #include "llvm/ADT/Optional.h"
@@ -149,6 +150,13 @@ class Lexer : public PreprocessorLexer {
   // CurrentConflictMarkerState - The kind of conflict marker we are handling.
   ConflictMarkerKind CurrentConflictMarkerState;
 
+  /// Non-empty if this \p Lexer is \p isDependencyDirectivesLexer().
+  ArrayRef<dependency_directives_scan::Directive> DepDirectives;
+
+  /// If this \p Lexer is \p isDependencyDirectivesLexer(), it represents the
+  /// next token to use from the current dependency directive.
+  unsigned NextDepDirectiveTokenIndex = 0;
+
   void InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd);
 
 public:
@@ -195,6 +203,23 @@ class Lexer : public PreprocessorLexer {
   /// return the tok::eof token.  This implicitly involves the preprocessor.
   bool Lex(Token &Result);
 
+  /// Called when the preprocessor is in 'dependency scanning lexing mode'.
+  bool LexDependencyDirectiveToken(Token &Result);
+
+  /// Called when the preprocessor is in 'dependency scanning lexing mode' and
+  /// is skipping a conditional block.
+  bool LexDependencyDirectiveTokenWhileSkipping(Token &Result);
+
+  /// True when the preprocessor is in 'dependency scanning lexing mode' and
+  /// created this \p Lexer for lexing a set of dependency directive tokens.
+  bool isDependencyDirectivesLexer() const { return !DepDirectives.empty(); }
+
+  /// Initializes \p Result with data from \p DDTok and advances \p BufferPtr to
+  /// the position just after the token.
+  /// \returns the buffer pointer at the beginning of the token.
+  const char *convertDependencyDirectiveToken(
+      const dependency_directives_scan::Token &DDTok, Token &Result);
+
 public:
   /// isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
   bool isPragmaLexer() const { return Is_PragmaLexer; }
@@ -288,14 +313,8 @@ class Lexer : public PreprocessorLexer {
     return BufferPtr - BufferStart;
   }
 
-  /// Skip over \p NumBytes bytes.
-  ///
-  /// If the skip is successful, the next token will be lexed from the new
-  /// offset. The lexer also assumes that we skipped to the start of the line.
-  ///
-  /// \returns true if the skip failed (new offset would have been past the
-  /// end of the buffer), false otherwise.
-  bool skipOver(unsigned NumBytes);
+  /// Set the lexer's buffer pointer to \p Offset.
+  void seek(unsigned Offset, bool IsAtStartOfLine);
 
   /// Stringify - Convert the specified string into a C string by i) escaping
   /// '\\' and " characters and ii) replacing newline character(s) with "\\n".

diff  --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index d394ad33c2372..6d8f03e3ceb1b 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -29,7 +29,6 @@
 #include "clang/Lex/ModuleLoader.h"
 #include "clang/Lex/ModuleMap.h"
 #include "clang/Lex/PPCallbacks.h"
-#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h"
 #include "clang/Lex/Token.h"
 #include "clang/Lex/TokenLexer.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -558,6 +557,7 @@ class Preprocessor {
     CLK_Lexer,
     CLK_TokenLexer,
     CLK_CachingLexer,
+    CLK_DependencyDirectivesLexer,
     CLK_LexAfterModuleImport
   } CurLexerKind = CLK_Lexer;
 
@@ -2595,14 +2595,6 @@ class Preprocessor {
   void emitMacroDeprecationWarning(const Token &Identifier) const;
   void emitRestrictExpansionWarning(const Token &Identifier) const;
   void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const;
-
-  Optional<unsigned>
-  getSkippedRangeForExcludedConditionalBlock(SourceLocation HashLoc);
-
-  /// Contains the currently active skipped range mappings for skipping excluded
-  /// conditional directives.
-  ExcludedPreprocessorDirectiveSkipMapping
-      *ExcludedConditionalDirectiveSkipMappings;
 };
 
 /// Abstract base class that describes a handler that will receive

diff  --git a/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h b/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h
deleted file mode 100644
index 49687cb5cc852..0000000000000
--- a/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h
+++ /dev/null
@@ -1,30 +0,0 @@
-//===- PreprocessorExcludedConditionalDirectiveSkipMapping.h - --*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_LEX_PREPROCESSOREXCLUDEDCONDITIONALDIRECTIVESKIPMAPPING_H
-#define LLVM_CLANG_LEX_PREPROCESSOREXCLUDEDCONDITIONALDIRECTIVESKIPMAPPING_H
-
-#include "clang/Basic/LLVM.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/MemoryBuffer.h"
-
-namespace clang {
-
-/// A mapping from an offset into a buffer to the number of bytes that can be
-/// skipped by the preprocessor when skipping over excluded conditional
-/// directive ranges.
-using PreprocessorSkippedRangeMapping = llvm::DenseMap<unsigned, unsigned>;
-
-/// The datastructure that holds the mapping between the active memory buffers
-/// and the individual skip mappings.
-using ExcludedPreprocessorDirectiveSkipMapping =
-    llvm::DenseMap<const char *, const PreprocessorSkippedRangeMapping *>;
-
-} // end namespace clang
-
-#endif // LLVM_CLANG_LEX_PREPROCESSOREXCLUDEDCONDITIONALDIRECTIVESKIPMAPPING_H

diff  --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h
index dc5382ddc7432..4cf18e98f051f 100644
--- a/clang/include/clang/Lex/PreprocessorOptions.h
+++ b/clang/include/clang/Lex/PreprocessorOptions.h
@@ -10,8 +10,9 @@
 #define LLVM_CLANG_LEX_PREPROCESSOROPTIONS_H_
 
 #include "clang/Basic/BitmaskEnum.h"
+#include "clang/Basic/FileEntry.h"
 #include "clang/Basic/LLVM.h"
-#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h"
+#include "clang/Lex/DependencyDirectivesScanner.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
 #include <functional>
@@ -200,13 +201,18 @@ class PreprocessorOptions {
   /// build it again.
   std::shared_ptr<FailedModulesSet> FailedModules;
 
-  /// Contains the currently active skipped range mappings for skipping excluded
-  /// conditional directives.
+  /// Function for getting the dependency preprocessor directives of a file.
   ///
-  /// The pointer is passed to the Preprocessor when it's constructed. The
-  /// pointer is unowned, the client is responsible for its lifetime.
-  ExcludedPreprocessorDirectiveSkipMapping
-      *ExcludedConditionalDirectiveSkipMappings = nullptr;
+  /// These are directives derived from a special form of lexing where the
+  /// source input is scanned for the preprocessor directives that might have an
+  /// effect on the dependencies for a compilation unit.
+  ///
+  /// Enables a client to cache the directives for a file and provide them
+  /// across multiple compiler invocations.
+  /// FIXME: Allow returning an error.
+  std::function<Optional<ArrayRef<dependency_directives_scan::Directive>>(
+      FileEntryRef)>
+      DependencyDirectivesForFile;
 
   /// Set up preprocessor for RunAnalysis action.
   bool SetUpStaticAnalyzer = false;

diff  --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index c5f239a2e4bff..e0d16df92e1a4 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -10,7 +10,7 @@
 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
 
 #include "clang/Basic/LLVM.h"
-#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h"
+#include "clang/Lex/DependencyDirectivesScanner.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/Allocator.h"
@@ -22,24 +22,26 @@ namespace clang {
 namespace tooling {
 namespace dependencies {
 
-/// Original and minimized contents of a cached file entry. Single instance can
+using DependencyDirectivesTy =
+    SmallVector<dependency_directives_scan::Directive, 20>;
+
+/// Contents and directive tokens of a cached file entry. Single instance can
 /// be shared between multiple entries.
 struct CachedFileContents {
-  CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Original)
-      : Original(std::move(Original)), MinimizedAccess(nullptr) {}
+  CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Contents)
+      : Original(std::move(Contents)), DepDirectives(nullptr) {}
 
   /// Owning storage for the original contents.
   std::unique_ptr<llvm::MemoryBuffer> Original;
 
   /// The mutex that must be locked before mutating directive tokens.
   std::mutex ValueLock;
-  /// Owning storage for the minimized contents.
-  std::unique_ptr<llvm::MemoryBuffer> MinimizedStorage;
+  SmallVector<dependency_directives_scan::Token, 10> DepDirectiveTokens;
   /// Accessor to the directive tokens that's atomic to avoid data races.
-  std::atomic<llvm::MemoryBuffer *> MinimizedAccess;
-  /// Skipped range mapping of the minimized contents.
-  /// This is initialized iff `MinimizedAccess != nullptr`.
-  PreprocessorSkippedRangeMapping PPSkippedRangeMapping;
+  /// \p CachedFileContents has ownership of the pointer.
+  std::atomic<const Optional<DependencyDirectivesTy> *> DepDirectives;
+
+  ~CachedFileContents() { delete DepDirectives.load(); }
 };
 
 /// An in-memory representation of a file system entity that is of interest to
@@ -86,13 +88,17 @@ class CachedFileSystemEntry {
 
   /// \returns The scanned preprocessor directive tokens of the file that are
   /// used to speed up preprocessing, if available.
-  StringRef getDirectiveTokens() const {
+  Optional<ArrayRef<dependency_directives_scan::Directive>>
+  getDirectiveTokens() const {
     assert(!isError() && "error");
-    assert(!MaybeStat->isDirectory() && "not a file");
+    assert(!isDirectory() && "not a file");
     assert(Contents && "contents not initialized");
-    llvm::MemoryBuffer *Buffer = Contents->MinimizedAccess.load();
-    assert(Buffer && "not minimized");
-    return Buffer->getBuffer();
+    if (auto *Directives = Contents->DepDirectives.load()) {
+      if (Directives->hasValue())
+        return ArrayRef<dependency_directives_scan::Directive>(
+            Directives->getValue());
+    }
+    return None;
   }
 
   /// \returns The error.
@@ -111,15 +117,6 @@ class CachedFileSystemEntry {
     return MaybeStat->getUniqueID();
   }
 
-  /// \returns The mapping between location -> distance that is used to speed up
-  /// the block skipping in the preprocessor.
-  const PreprocessorSkippedRangeMapping &getPPSkippedRangeMapping() const {
-    assert(!isError() && "error");
-    assert(!isDirectory() && "not a file");
-    assert(Contents && "contents not initialized");
-    return Contents->PPSkippedRangeMapping;
-  }
-
   /// \returns The data structure holding both contents and directive tokens.
   CachedFileContents *getCachedContents() const {
     assert(!isError() && "error");
@@ -237,10 +234,6 @@ class DependencyScanningFilesystemLocalCache {
 /// If the underlying entry is an opened file, this wrapper returns the file
 /// contents and the scanned preprocessor directives.
 class EntryRef {
-  /// For entry that is an opened file, this bit signifies whether its contents
-  /// are minimized.
-  bool Minimized;
-
   /// The filename used to access this entry.
   std::string Filename;
 
@@ -248,8 +241,8 @@ class EntryRef {
   const CachedFileSystemEntry &Entry;
 
 public:
-  EntryRef(bool Minimized, StringRef Name, const CachedFileSystemEntry &Entry)
-      : Minimized(Minimized), Filename(Name), Entry(Entry) {}
+  EntryRef(StringRef Name, const CachedFileSystemEntry &Entry)
+      : Filename(Name), Entry(Entry) {}
 
   llvm::vfs::Status getStatus() const {
     llvm::vfs::Status Stat = Entry.getStatus();
@@ -268,12 +261,11 @@ class EntryRef {
     return *this;
   }
 
-  StringRef getContents() const {
-    return Minimized ? Entry.getDirectiveTokens() : Entry.getOriginalContents();
-  }
+  StringRef getContents() const { return Entry.getOriginalContents(); }
 
-  const PreprocessorSkippedRangeMapping *getPPSkippedRangeMapping() const {
-    return Minimized ? &Entry.getPPSkippedRangeMapping() : nullptr;
+  Optional<ArrayRef<dependency_directives_scan::Directive>>
+  getDirectiveTokens() const {
+    return Entry.getDirectiveTokens();
   }
 };
 
@@ -290,24 +282,13 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
 public:
   DependencyScanningWorkerFilesystem(
       DependencyScanningFilesystemSharedCache &SharedCache,
-      IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
-      ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings)
-      : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache),
-        PPSkipMappings(PPSkipMappings) {}
+      IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)
+      : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache) {}
 
   llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override;
   llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
   openFileForRead(const Twine &Path) override;
 
-  /// Disable directives scanning of the given file.
-  void disableDirectivesScanning(StringRef Filename);
-  /// Enable directives scanning of all files.
-  void enableDirectivesScanningOfAllFiles() { NotToBeScanned.clear(); }
-
-private:
-  /// Check whether the file should be scanned for preprocessor directives.
-  bool shouldScanForDirectives(StringRef Filename, llvm::sys::fs::UniqueID UID);
-
   /// Returns entry for the given filename.
   ///
   /// Attempts to use the local and shared caches first, then falls back to
@@ -316,6 +297,10 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
   getOrCreateFileSystemEntry(StringRef Filename,
                              bool DisableDirectivesScanning = false);
 
+private:
+  /// Check whether the file should be scanned for preprocessor directives.
+  bool shouldScanForDirectives(StringRef Filename);
+
   /// For a filename that's not yet associated with any entry in the caches,
   /// uses the underlying filesystem to either look up the entry based in the
   /// shared cache indexed by unique ID, or creates new entry from scratch.
@@ -396,12 +381,6 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
   /// The local cache is used by the worker thread to cache file system queries
   /// locally instead of querying the global cache every time.
   DependencyScanningFilesystemLocalCache LocalCache;
-  /// The mapping structure which records information about the
-  /// excluded conditional directive skip mappings that are used by the
-  /// currently active preprocessor.
-  ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings;
-  /// The set of files that should not be scanned for PP directives.
-  llvm::DenseSet<llvm::sys::fs::UniqueID> NotToBeScanned;
 };
 
 } // end namespace dependencies

diff  --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
index 84c74ac66359d..337bba2e72da9 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
@@ -13,7 +13,6 @@
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Frontend/PCHContainerOperations.h"
-#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h"
 #include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
 #include "llvm/Support/Error.h"
@@ -69,7 +68,6 @@ class DependencyScanningWorker {
 
 private:
   std::shared_ptr<PCHContainerOperations> PCHContainerOps;
-  ExcludedPreprocessorDirectiveSkipMapping PPSkipMappings;
 
   /// The physical filesystem overlaid by `InMemoryFS`.
   llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> RealFS;

diff  --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp
index ba492c29d9c02..f61c83a2a465e 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -1157,10 +1157,10 @@ void PrintDependencyDirectivesSourceMinimizerAction::ExecuteAction() {
   SourceManager &SM = CI.getPreprocessor().getSourceManager();
   llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(SM.getMainFileID());
 
-  llvm::SmallString<1024> Output;
+  llvm::SmallVector<dependency_directives_scan::Token, 16> Tokens;
   llvm::SmallVector<dependency_directives_scan::Directive, 32> Directives;
   if (scanSourceForDependencyDirectives(
-          FromFile.getBuffer(), Output, Directives, &CI.getDiagnostics(),
+          FromFile.getBuffer(), Tokens, Directives, &CI.getDiagnostics(),
           SM.getLocForStartOfFile(SM.getMainFileID()))) {
     assert(CI.getDiagnostics().hasErrorOccurred() &&
            "no errors reported for failure");
@@ -1179,7 +1179,8 @@ void PrintDependencyDirectivesSourceMinimizerAction::ExecuteAction() {
     }
     return;
   }
-  llvm::outs() << Output;
+  printDependencyDirectivesAsSource(FromFile.getBuffer(), Directives,
+                                    llvm::outs());
 }
 
 void GetDependenciesByModuleNameAction::ExecuteAction() {

diff  --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index dd1ea1cfe11e0..4c0105269eebd 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -18,92 +18,148 @@
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/Lexer.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/Support/MemoryBuffer.h"
 
-using namespace llvm;
 using namespace clang;
 using namespace clang::dependency_directives_scan;
+using namespace llvm;
 
 namespace {
 
-struct Scanner {
-  /// Minimized output.
-  SmallVectorImpl<char> &Out;
-  /// The known tokens encountered during the minimization.
-  SmallVectorImpl<Directive> &Directives;
+struct DirectiveWithTokens {
+  DirectiveKind Kind;
+  unsigned NumTokens;
 
-  Scanner(SmallVectorImpl<char> &Out, SmallVectorImpl<Directive> &Directives,
-          StringRef Input, DiagnosticsEngine *Diags,
-          SourceLocation InputSourceLoc)
-      : Out(Out), Directives(Directives), Input(Input), Diags(Diags),
-        InputSourceLoc(InputSourceLoc) {}
+  DirectiveWithTokens(DirectiveKind Kind, unsigned NumTokens)
+      : Kind(Kind), NumTokens(NumTokens) {}
+};
+
+/// Does an efficient "scan" of the sources to detect the presence of
+/// preprocessor (or module import) directives and collects the raw lexed tokens
+/// for those directives so that the \p Lexer can "replay" them when the file is
+/// included.
+///
+/// Note that the behavior of the raw lexer is affected by the language mode,
+/// while at this point we want to do a scan and collect tokens once,
+/// irrespective of the language mode that the file will get included in. To
+/// compensate for that the \p Lexer, while "replaying", will adjust a token
+/// where appropriate, when it could affect the preprocessor's state.
+/// For example in a directive like
+///
+/// \code
+///   #if __has_cpp_attribute(clang::fallthrough)
+/// \endcode
+///
+/// The preprocessor needs to see '::' as 'tok::coloncolon' instead of 2
+/// 'tok::colon'. The \p Lexer will adjust if it sees consecutive 'tok::colon'
+/// while in C++ mode.
+struct Scanner {
+  Scanner(StringRef Input,
+          SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
+          DiagnosticsEngine *Diags, SourceLocation InputSourceLoc)
+      : Input(Input), Tokens(Tokens), Diags(Diags),
+        InputSourceLoc(InputSourceLoc), LangOpts(getLangOptsForDepScanning()),
+        TheLexer(InputSourceLoc, LangOpts, Input.begin(), Input.begin(),
+                 Input.end()) {}
+
+  static LangOptions getLangOptsForDepScanning() {
+    LangOptions LangOpts;
+    // Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'.
+    LangOpts.ObjC = true;
+    LangOpts.LineComment = true;
+    return LangOpts;
+  }
 
   /// Lex the provided source and emit the directive tokens.
   ///
   /// \returns True on error.
-  bool scan();
+  bool scan(SmallVectorImpl<Directive> &Directives);
 
 private:
-  struct IdInfo {
-    const char *Last;
-    StringRef Name;
-  };
+  /// Lexes next token and advances \p First and the \p Lexer.
+  LLVM_NODISCARD dependency_directives_scan::Token &
+  lexToken(const char *&First, const char *const End);
+
+  dependency_directives_scan::Token &lexIncludeFilename(const char *&First,
+                                                        const char *const End);
 
-  /// Lex an identifier.
+  /// Lexes next token and if it is identifier returns its string, otherwise
+  /// it skips the current line and returns \p None.
   ///
-  /// \pre First points at a valid identifier head.
-  LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End);
-  LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First,
-                                       const char *const End);
+  /// In any case (whatever the token kind) \p First and the \p Lexer will
+  /// advance beyond the token.
+  LLVM_NODISCARD Optional<StringRef>
+  tryLexIdentifierOrSkipLine(const char *&First, const char *const End);
+
+  /// Used when it is certain that next token is an identifier.
+  LLVM_NODISCARD StringRef lexIdentifier(const char *&First,
+                                         const char *const End);
+
+  /// Lexes next token and returns true iff it is an identifier that matches \p
+  /// Id, otherwise it skips the current line and returns false.
+  ///
+  /// In any case (whatever the token kind) \p First and the \p Lexer will
+  /// advance beyond the token.
+  LLVM_NODISCARD bool isNextIdentifierOrSkipLine(StringRef Id,
+                                                 const char *&First,
+                                                 const char *const End);
+
   LLVM_NODISCARD bool scanImpl(const char *First, const char *const End);
   LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End);
   LLVM_NODISCARD bool lexAt(const char *&First, const char *const End);
   LLVM_NODISCARD bool lexModule(const char *&First, const char *const End);
-  LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End);
+  LLVM_NODISCARD bool lexDefine(const char *HashLoc, const char *&First,
+                                const char *const End);
   LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End);
   LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End);
-  LLVM_NODISCARD bool lexDefault(DirectiveKind Kind, StringRef Directive,
-                                 const char *&First, const char *const End);
-  Directive &pushDirective(DirectiveKind K) {
-    Directives.emplace_back(K, Out.size());
-    return Directives.back();
+  LLVM_NODISCARD bool lexDefault(DirectiveKind Kind, const char *&First,
+                                 const char *const End);
+  LLVM_NODISCARD bool lexModuleDirectiveBody(DirectiveKind Kind,
+                                             const char *&First,
+                                             const char *const End);
+  void lexPPDirectiveBody(const char *&First, const char *const End);
+
+  DirectiveWithTokens &pushDirective(DirectiveKind Kind) {
+    Tokens.append(CurDirToks);
+    DirsWithToks.emplace_back(Kind, CurDirToks.size());
+    CurDirToks.clear();
+    return DirsWithToks.back();
   }
   void popDirective() {
-    Out.resize(Directives.back().Offset);
-    Directives.pop_back();
+    Tokens.pop_back_n(DirsWithToks.pop_back_val().NumTokens);
   }
   DirectiveKind topDirective() const {
-    return Directives.empty() ? pp_none : Directives.back().Kind;
+    return DirsWithToks.empty() ? pp_none : DirsWithToks.back().Kind;
   }
 
-  Scanner &put(char Byte) {
-    Out.push_back(Byte);
-    return *this;
-  }
-  Scanner &append(StringRef S) { return append(S.begin(), S.end()); }
-  Scanner &append(const char *First, const char *Last) {
-    Out.append(First, Last);
-    return *this;
+  unsigned getOffsetAt(const char *CurPtr) const {
+    return CurPtr - Input.data();
   }
 
-  void printToNewline(const char *&First, const char *const End);
-  void printAdjacentModuleNameParts(const char *&First, const char *const End);
-  LLVM_NODISCARD bool printAtImportBody(const char *&First,
-                                        const char *const End);
-  void printDirectiveBody(const char *&First, const char *const End);
-  void printAdjacentMacroArgs(const char *&First, const char *const End);
-  LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End);
-
   /// Reports a diagnostic if the diagnostic engine is provided. Always returns
   /// true at the end.
   bool reportError(const char *CurPtr, unsigned Err);
 
   StringMap<char> SplitIds;
   StringRef Input;
+  SmallVectorImpl<dependency_directives_scan::Token> &Tokens;
   DiagnosticsEngine *Diags;
   SourceLocation InputSourceLoc;
+
+  /// Keeps track of the tokens for the currently lexed directive. Once a
+  /// directive is fully lexed and "committed" then the tokens get appended to
+  /// \p Tokens and \p CurDirToks is cleared for the next directive.
+  SmallVector<dependency_directives_scan::Token, 32> CurDirToks;
+  /// The directives that were lexed along with the number of tokens that each
+  /// directive contains. The tokens of all the directives are kept in \p Tokens
+  /// vector, in the same order as the directives order in \p DirsWithToks.
+  SmallVector<DirectiveWithTokens, 64> DirsWithToks;
+  LangOptions LangOpts;
+  Lexer TheLexer;
 };
 
 } // end anonymous namespace
@@ -112,7 +168,7 @@ bool Scanner::reportError(const char *CurPtr, unsigned Err) {
   if (!Diags)
     return true;
   assert(CurPtr >= Input.data() && "invalid buffer ptr");
-  Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err);
+  Diags->Report(InputSourceLoc.getLocWithOffset(getOffsetAt(CurPtr)), Err);
   return true;
 }
 
@@ -265,30 +321,6 @@ static void skipToNewlineRaw(const char *&First, const char *const End) {
   }
 }
 
-static const char *findLastNonSpace(const char *First, const char *Last) {
-  assert(First <= Last);
-  while (First != Last && isHorizontalWhitespace(Last[-1]))
-    --Last;
-  return Last;
-}
-
-static const char *findLastNonSpaceNonBackslash(const char *First,
-                                                const char *Last) {
-  assert(First <= Last);
-  while (First != Last &&
-         (isHorizontalWhitespace(Last[-1]) || Last[-1] == '\\'))
-    --Last;
-  return Last;
-}
-
-static const char *findFirstTrailingSpace(const char *First, const char *Last) {
-  const char *LastNonSpace = findLastNonSpace(First, Last);
-  if (Last == LastNonSpace)
-    return Last;
-  assert(isHorizontalWhitespace(LastNonSpace[0]));
-  return LastNonSpace + 1;
-}
-
 static void skipLineComment(const char *&First, const char *const End) {
   assert(First[0] == '/' && First[1] == '/');
   First += 2;
@@ -396,67 +428,6 @@ static void skipDirective(StringRef Name, const char *&First,
     skipLine(First, End);
 }
 
-void Scanner::printToNewline(const char *&First, const char *const End) {
-  while (First != End && !isVerticalWhitespace(*First)) {
-    const char *Last = First;
-    do {
-      // Iterate over strings correctly to avoid comments and newlines.
-      if (*Last == '"' || *Last == '\'' ||
-          (*Last == '<' &&
-           (topDirective() == pp_include || topDirective() == pp_import))) {
-        if (LLVM_UNLIKELY(isRawStringLiteral(First, Last)))
-          skipRawString(Last, End);
-        else
-          skipString(Last, End);
-        continue;
-      }
-      if (*Last != '/' || End - Last < 2) {
-        ++Last;
-        continue; // Gather the rest up to print verbatim.
-      }
-
-      if (Last[1] != '/' && Last[1] != '*') {
-        ++Last;
-        continue;
-      }
-
-      // Deal with "//..." and "/*...*/".
-      append(First, findFirstTrailingSpace(First, Last));
-      First = Last;
-
-      if (Last[1] == '/') {
-        skipLineComment(First, End);
-        return;
-      }
-
-      put(' ');
-      skipBlockComment(First, End);
-      skipOverSpaces(First, End);
-      Last = First;
-    } while (Last != End && !isVerticalWhitespace(*Last));
-
-    // Print out the string.
-    const char *LastBeforeTrailingSpace = findLastNonSpace(First, Last);
-    if (Last == End || LastBeforeTrailingSpace == First ||
-        LastBeforeTrailingSpace[-1] != '\\') {
-      append(First, LastBeforeTrailingSpace);
-      First = Last;
-      skipNewline(First, End);
-      return;
-    }
-
-    // Print up to the last character that's not a whitespace or backslash.
-    // Then print exactly one space, which matters when tokens are separated by
-    // a line continuation.
-    append(First, findLastNonSpaceNonBackslash(First, Last));
-    put(' ');
-
-    First = Last;
-    skipNewline(First, End);
-    skipOverSpaces(First, End);
-  }
-}
-
 static void skipWhitespace(const char *&First, const char *const End) {
   for (;;) {
     assert(First <= End);
@@ -489,176 +460,134 @@ static void skipWhitespace(const char *&First, const char *const End) {
   }
 }
 
-void Scanner::printAdjacentModuleNameParts(const char *&First,
-                                           const char *const End) {
-  // Skip over parts of the body.
-  const char *Last = First;
-  do
-    ++Last;
-  while (Last != End && (isAsciiIdentifierContinue(*Last) || *Last == '.'));
-  append(First, Last);
-  First = Last;
-}
-
-bool Scanner::printAtImportBody(const char *&First, const char *const End) {
+bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First,
+                                     const char *const End) {
+  const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset;
   for (;;) {
-    skipWhitespace(First, End);
-    if (First == End)
-      return true;
-
-    if (isVerticalWhitespace(*First)) {
-      skipNewline(First, End);
-      continue;
-    }
-
-    // Found a semicolon.
-    if (*First == ';') {
-      put(*First++).put('\n');
-      return false;
-    }
-
-    // Don't handle macro expansions inside @import for now.
-    if (!isAsciiIdentifierContinue(*First) && *First != '.')
-      return true;
-
-    printAdjacentModuleNameParts(First, End);
+    const dependency_directives_scan::Token &Tok = lexToken(First, End);
+    if (Tok.is(tok::eof))
+      return reportError(
+          DirectiveLoc,
+          diag::err_dep_source_scanner_missing_semi_after_at_import);
+    if (Tok.is(tok::semi))
+      break;
   }
+  pushDirective(Kind);
+  skipWhitespace(First, End);
+  if (First == End)
+    return false;
+  if (!isVerticalWhitespace(*First))
+    return reportError(
+        DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import);
+  skipNewline(First, End);
+  return false;
 }
 
-void Scanner::printDirectiveBody(const char *&First, const char *const End) {
-  skipWhitespace(First, End); // Skip initial whitespace.
-  printToNewline(First, End);
-  while (Out.back() == ' ')
-    Out.pop_back();
-  put('\n');
-}
+dependency_directives_scan::Token &Scanner::lexToken(const char *&First,
+                                                     const char *const End) {
+  clang::Token Tok;
+  TheLexer.LexFromRawLexer(Tok);
+  First = Input.data() + TheLexer.getCurrentBufferOffset();
+  assert(First <= End);
 
-LLVM_NODISCARD static const char *lexRawIdentifier(const char *First,
-                                                   const char *const End) {
-  assert(isAsciiIdentifierContinue(*First) && "invalid identifer");
-  const char *Last = First + 1;
-  while (Last != End && isAsciiIdentifierContinue(*Last))
-    ++Last;
-  return Last;
+  unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength();
+  CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(),
+                          Tok.getFlags());
+  return CurDirToks.back();
 }
 
-LLVM_NODISCARD static const char *
-getIdentifierContinuation(const char *First, const char *const End) {
-  if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1]))
-    return nullptr;
+dependency_directives_scan::Token &
+Scanner::lexIncludeFilename(const char *&First, const char *const End) {
+  clang::Token Tok;
+  TheLexer.LexIncludeFilename(Tok);
+  First = Input.data() + TheLexer.getCurrentBufferOffset();
+  assert(First <= End);
 
-  ++First;
-  skipNewline(First, End);
-  if (First == End)
-    return nullptr;
-  return isAsciiIdentifierContinue(First[0]) ? First : nullptr;
-}
-
-Scanner::IdInfo Scanner::lexIdentifier(const char *First,
-                                       const char *const End) {
-  const char *Last = lexRawIdentifier(First, End);
-  const char *Next = getIdentifierContinuation(Last, End);
-  if (LLVM_LIKELY(!Next))
-    return IdInfo{Last, StringRef(First, Last - First)};
-
-  // Slow path, where identifiers are split over lines.
-  SmallVector<char, 64> Id(First, Last);
-  while (Next) {
-    Last = lexRawIdentifier(Next, End);
-    Id.append(Next, Last);
-    Next = getIdentifierContinuation(Last, End);
-  }
-  return IdInfo{
-      Last,
-      SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()};
+  unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength();
+  CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(),
+                          Tok.getFlags());
+  return CurDirToks.back();
 }
 
-void Scanner::printAdjacentMacroArgs(const char *&First,
-                                     const char *const End) {
-  // Skip over parts of the body.
-  const char *Last = First;
-  do
-    ++Last;
-  while (Last != End &&
-         (isAsciiIdentifierContinue(*Last) || *Last == '.' || *Last == ','));
-  append(First, Last);
-  First = Last;
+void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) {
+  while (true) {
+    const dependency_directives_scan::Token &Tok = lexToken(First, End);
+    if (Tok.is(tok::eod))
+      break;
+  }
 }
 
-bool Scanner::printMacroArgs(const char *&First, const char *const End) {
-  assert(*First == '(');
-  put(*First++);
-  for (;;) {
-    skipWhitespace(First, End);
-    if (First == End)
-      return true;
+LLVM_NODISCARD Optional<StringRef>
+Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) {
+  const dependency_directives_scan::Token &Tok = lexToken(First, End);
+  if (Tok.isNot(tok::raw_identifier)) {
+    if (!Tok.is(tok::eod))
+      skipLine(First, End);
+    return None;
+  }
 
-    if (*First == ')') {
-      put(*First++);
-      return false;
-    }
+  bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning;
+  if (LLVM_LIKELY(!NeedsCleaning))
+    return Input.slice(Tok.Offset, Tok.getEnd());
 
-    // This is intentionally fairly liberal.
-    if (!(isAsciiIdentifierContinue(*First) || *First == '.' || *First == ','))
-      return true;
+  SmallString<64> Spelling;
+  Spelling.resize(Tok.Length);
 
-    printAdjacentMacroArgs(First, End);
+  unsigned SpellingLength = 0;
+  const char *BufPtr = Input.begin() + Tok.Offset;
+  const char *AfterIdent = Input.begin() + Tok.getEnd();
+  while (BufPtr < AfterIdent) {
+    unsigned Size;
+    Spelling[SpellingLength++] =
+        Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
+    BufPtr += Size;
   }
+
+  return SplitIds.try_emplace(StringRef(Spelling.begin(), SpellingLength), 0)
+      .first->first();
 }
 
-/// Looks for an identifier starting from Last.
-///
-/// Updates "First" to just past the next identifier, if any.  Returns true iff
-/// the identifier matches "Id".
-bool Scanner::isNextIdentifier(StringRef Id, const char *&First,
-                               const char *const End) {
-  skipWhitespace(First, End);
-  if (First == End || !isAsciiIdentifierStart(*First))
-    return false;
+StringRef Scanner::lexIdentifier(const char *&First, const char *const End) {
+  Optional<StringRef> Id = tryLexIdentifierOrSkipLine(First, End);
+  assert(Id.hasValue() && "expected identifier token");
+  return Id.getValue();
+}
 
-  IdInfo FoundId = lexIdentifier(First, End);
-  First = FoundId.Last;
-  return FoundId.Name == Id;
+bool Scanner::isNextIdentifierOrSkipLine(StringRef Id, const char *&First,
+                                         const char *const End) {
+  if (Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End)) {
+    if (*FoundId == Id)
+      return true;
+    skipLine(First, End);
+  }
+  return false;
 }
 
 bool Scanner::lexAt(const char *&First, const char *const End) {
   // Handle "@import".
-  const char *ImportLoc = First++;
-  if (!isNextIdentifier("import", First, End)) {
-    skipLine(First, End);
-    return false;
-  }
-  pushDirective(decl_at_import);
-  append("@import ");
-  if (printAtImportBody(First, End))
-    return reportError(
-        ImportLoc, diag::err_dep_source_scanner_missing_semi_after_at_import);
-  skipWhitespace(First, End);
-  if (First == End)
+
+  // Lex '@'.
+  const dependency_directives_scan::Token &AtTok = lexToken(First, End);
+  assert(AtTok.is(tok::at));
+  (void)AtTok;
+
+  if (!isNextIdentifierOrSkipLine("import", First, End))
     return false;
-  if (!isVerticalWhitespace(*First))
-    return reportError(
-        ImportLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import);
-  skipNewline(First, End);
-  return false;
+  return lexModuleDirectiveBody(decl_at_import, First, End);
 }
 
 bool Scanner::lexModule(const char *&First, const char *const End) {
-  IdInfo Id = lexIdentifier(First, End);
-  First = Id.Last;
+  StringRef Id = lexIdentifier(First, End);
   bool Export = false;
-  if (Id.Name == "export") {
+  if (Id == "export") {
     Export = true;
-    skipWhitespace(First, End);
-    if (!isAsciiIdentifierContinue(*First)) {
-      skipLine(First, End);
+    Optional<StringRef> NextId = tryLexIdentifierOrSkipLine(First, End);
+    if (!NextId)
       return false;
-    }
-    Id = lexIdentifier(First, End);
-    First = Id.Last;
+    Id = *NextId;
   }
 
-  if (Id.Name != "module" && Id.Name != "import") {
+  if (Id != "module" && Id != "import") {
     skipLine(First, End);
     return false;
   }
@@ -680,114 +609,51 @@ bool Scanner::lexModule(const char *&First, const char *const End) {
     }
   }
 
-  if (Export) {
-    pushDirective(cxx_export_decl);
-    append("export ");
-  }
+  TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ false);
 
-  if (Id.Name == "module")
-    pushDirective(cxx_module_decl);
+  DirectiveKind Kind;
+  if (Id == "module")
+    Kind = Export ? cxx_export_module_decl : cxx_module_decl;
   else
-    pushDirective(cxx_import_decl);
-  append(Id.Name);
-  append(" ");
-  printToNewline(First, End);
-  append("\n");
-  return false;
-}
-
-bool Scanner::lexDefine(const char *&First, const char *const End) {
-  pushDirective(pp_define);
-  append("#define ");
-  skipWhitespace(First, End);
+    Kind = Export ? cxx_export_import_decl : cxx_import_decl;
 
-  if (!isAsciiIdentifierStart(*First))
-    return reportError(First, diag::err_pp_macro_not_identifier);
-
-  IdInfo Id = lexIdentifier(First, End);
-  const char *Last = Id.Last;
-  append(Id.Name);
-  if (Last == End)
-    return false;
-  if (*Last == '(') {
-    size_t Size = Out.size();
-    if (printMacroArgs(Last, End)) {
-      // Be robust to bad macro arguments, since they can show up in disabled
-      // code.
-      Out.resize(Size);
-      append("(/* invalid */\n");
-      skipLine(Last, End);
-      return false;
-    }
-  }
-  skipWhitespace(Last, End);
-  if (Last == End)
-    return false;
-  if (!isVerticalWhitespace(*Last))
-    put(' ');
-  printDirectiveBody(Last, End);
-  First = Last;
-  return false;
+  return lexModuleDirectiveBody(Kind, First, End);
 }
 
 bool Scanner::lexPragma(const char *&First, const char *const End) {
-  // #pragma.
-  skipWhitespace(First, End);
-  if (First == End || !isAsciiIdentifierStart(*First))
+  Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End);
+  if (!FoundId)
     return false;
 
-  IdInfo FoundId = lexIdentifier(First, End);
-  First = FoundId.Last;
-  if (FoundId.Name == "once") {
-    // #pragma once
-    skipLine(First, End);
-    pushDirective(pp_pragma_once);
-    append("#pragma once\n");
-    return false;
-  }
-  if (FoundId.Name == "push_macro") {
-    // #pragma push_macro
-    pushDirective(pp_pragma_push_macro);
-    append("#pragma push_macro");
-    printDirectiveBody(First, End);
-    return false;
-  }
-  if (FoundId.Name == "pop_macro") {
-    // #pragma pop_macro
-    pushDirective(pp_pragma_pop_macro);
-    append("#pragma pop_macro");
-    printDirectiveBody(First, End);
-    return false;
-  }
-  if (FoundId.Name == "include_alias") {
-    // #pragma include_alias
-    pushDirective(pp_pragma_include_alias);
-    append("#pragma include_alias");
-    printDirectiveBody(First, End);
+  StringRef Id = FoundId.getValue();
+  auto Kind = llvm::StringSwitch<DirectiveKind>(Id)
+                  .Case("once", pp_pragma_once)
+                  .Case("push_macro", pp_pragma_push_macro)
+                  .Case("pop_macro", pp_pragma_pop_macro)
+                  .Case("include_alias", pp_pragma_include_alias)
+                  .Default(pp_none);
+  if (Kind != pp_none) {
+    lexPPDirectiveBody(First, End);
+    pushDirective(Kind);
     return false;
   }
 
-  if (FoundId.Name != "clang") {
+  if (Id != "clang") {
     skipLine(First, End);
     return false;
   }
 
   // #pragma clang.
-  if (!isNextIdentifier("module", First, End)) {
-    skipLine(First, End);
+  if (!isNextIdentifierOrSkipLine("module", First, End))
     return false;
-  }
 
   // #pragma clang module.
-  if (!isNextIdentifier("import", First, End)) {
-    skipLine(First, End);
+  if (!isNextIdentifierOrSkipLine("import", First, End))
     return false;
-  }
 
   // #pragma clang module import.
+  lexPPDirectiveBody(First, End);
   pushDirective(pp_pragma_import);
-  append("#pragma clang module import ");
-  printDirectiveBody(First, End);
   return false;
 }
 
@@ -808,14 +674,13 @@ bool Scanner::lexEndif(const char *&First, const char *const End) {
     return false;
   }
 
-  return lexDefault(pp_endif, "endif", First, End);
+  return lexDefault(pp_endif, First, End);
 }
 
-bool Scanner::lexDefault(DirectiveKind Kind, StringRef Directive,
-                         const char *&First, const char *const End) {
+bool Scanner::lexDefault(DirectiveKind Kind, const char *&First,
+                         const char *const End) {
+  lexPPDirectiveBody(First, End);
   pushDirective(Kind);
-  put('#').append(Directive).put(' ');
-  printDirectiveBody(First, End);
   return false;
 }
 
@@ -845,6 +710,14 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {
     return false;
   }
 
+  TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ true);
+
+  auto ScEx1 = make_scope_exit([&]() {
+    /// Clear Scanner's CurDirToks before returning, in case we didn't push a
+    /// new directive.
+    CurDirToks.clear();
+  });
+
   // Handle "@import".
   if (*First == '@')
     return lexAt(First, End);
@@ -853,25 +726,26 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {
     return lexModule(First, End);
 
   // Handle preprocessing directives.
-  ++First; // Skip over '#'.
-  skipWhitespace(First, End);
 
-  if (First == End)
-    return reportError(First, diag::err_pp_expected_eol);
+  TheLexer.setParsingPreprocessorDirective(true);
+  auto ScEx2 = make_scope_exit(
+      [&]() { TheLexer.setParsingPreprocessorDirective(false); });
 
-  if (!isAsciiIdentifierStart(*First)) {
-    skipLine(First, End);
+  // Lex '#'.
+  const dependency_directives_scan::Token &HashTok = lexToken(First, End);
+  assert(HashTok.is(tok::hash));
+  (void)HashTok;
+
+  Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End);
+  if (!FoundId)
     return false;
-  }
 
-  // Figure out the token.
-  IdInfo Id = lexIdentifier(First, End);
-  First = Id.Last;
+  StringRef Id = FoundId.getValue();
 
-  if (Id.Name == "pragma")
+  if (Id == "pragma")
     return lexPragma(First, End);
 
-  auto Kind = llvm::StringSwitch<DirectiveKind>(Id.Name)
+  auto Kind = llvm::StringSwitch<DirectiveKind>(Id)
                   .Case("include", pp_include)
                   .Case("__include_macros", pp___include_macros)
                   .Case("define", pp_define)
@@ -888,18 +762,26 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {
                   .Case("endif", pp_endif)
                   .Default(pp_none);
   if (Kind == pp_none) {
-    skipDirective(Id.Name, First, End);
+    skipDirective(Id, First, End);
     return false;
   }
 
   if (Kind == pp_endif)
     return lexEndif(First, End);
 
-  if (Kind == pp_define)
-    return lexDefine(First, End);
+  switch (Kind) {
+  case pp_include:
+  case pp___include_macros:
+  case pp_include_next:
+  case pp_import:
+    lexIncludeFilename(First, End);
+    break;
+  default:
+    break;
+  }
 
   // Everything else.
-  return lexDefault(Kind, Id.Name, First, End);
+  return lexDefault(Kind, First, End);
 }
 
 static void skipUTF8ByteOrderMark(const char *&First, const char *const End) {
@@ -916,78 +798,65 @@ bool Scanner::scanImpl(const char *First, const char *const End) {
   return false;
 }
 
-bool Scanner::scan() {
+bool Scanner::scan(SmallVectorImpl<Directive> &Directives) {
   bool Error = scanImpl(Input.begin(), Input.end());
 
   if (!Error) {
-    // Add a trailing newline and an EOF on success.
-    if (!Out.empty() && Out.back() != '\n')
-      Out.push_back('\n');
+    // Add an EOF on success.
     pushDirective(pp_eof);
   }
 
-  // Null-terminate the output. This way the memory buffer that's passed to
-  // Clang will not have to worry about the terminating '\0'.
-  Out.push_back(0);
-  Out.pop_back();
+  ArrayRef<dependency_directives_scan::Token> RemainingTokens = Tokens;
+  for (const DirectiveWithTokens &DirWithToks : DirsWithToks) {
+    assert(RemainingTokens.size() >= DirWithToks.NumTokens);
+    Directives.emplace_back(DirWithToks.Kind,
+                            RemainingTokens.take_front(DirWithToks.NumTokens));
+    RemainingTokens = RemainingTokens.drop_front(DirWithToks.NumTokens);
+  }
+  assert(RemainingTokens.empty());
+
   return Error;
 }
 
-bool clang::dependency_directives_scan::computeSkippedRanges(
-    ArrayRef<Directive> Input, llvm::SmallVectorImpl<SkippedRange> &Range) {
-  struct IfElseDirective {
-    enum DirectiveKind {
-      If,  // if/ifdef/ifndef
-      Else // elif/elifdef/elifndef, else
-    };
-    int Offset;
-    DirectiveKind Kind;
+bool clang::scanSourceForDependencyDirectives(
+    StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
+    SmallVectorImpl<Directive> &Directives, DiagnosticsEngine *Diags,
+    SourceLocation InputSourceLoc) {
+  return Scanner(Input, Tokens, Diags, InputSourceLoc).scan(Directives);
+}
+
+void clang::printDependencyDirectivesAsSource(
+    StringRef Source,
+    ArrayRef<dependency_directives_scan::Directive> Directives,
+    llvm::raw_ostream &OS) {
+  // Add a space separator where it is convenient for testing purposes.
+  auto needsSpaceSeparator =
+      [](tok::TokenKind Prev,
+         const dependency_directives_scan::Token &Tok) -> bool {
+    if (Prev == Tok.Kind)
+      return !Tok.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
+                          tok::r_square);
+    if (Prev == tok::raw_identifier &&
+        Tok.isOneOf(tok::hash, tok::numeric_constant, tok::string_literal,
+                    tok::char_constant, tok::header_name))
+      return true;
+    if (Prev == tok::r_paren &&
+        Tok.isOneOf(tok::raw_identifier, tok::hash, tok::string_literal,
+                    tok::char_constant, tok::unknown))
+      return true;
+    if (Prev == tok::comma &&
+        Tok.isOneOf(tok::l_paren, tok::string_literal, tok::less))
+      return true;
+    return false;
   };
-  llvm::SmallVector<IfElseDirective, 32> Offsets;
-  for (const Directive &T : Input) {
-    switch (T.Kind) {
-    case pp_if:
-    case pp_ifdef:
-    case pp_ifndef:
-      Offsets.push_back({T.Offset, IfElseDirective::If});
-      break;
-
-    case pp_elif:
-    case pp_elifdef:
-    case pp_elifndef:
-    case pp_else: {
-      if (Offsets.empty())
-        return true;
-      int PreviousOffset = Offsets.back().Offset;
-      Range.push_back({PreviousOffset, T.Offset - PreviousOffset});
-      Offsets.push_back({T.Offset, IfElseDirective::Else});
-      break;
-    }
 
-    case pp_endif: {
-      if (Offsets.empty())
-        return true;
-      int PreviousOffset = Offsets.back().Offset;
-      Range.push_back({PreviousOffset, T.Offset - PreviousOffset});
-      do {
-        IfElseDirective::DirectiveKind Kind = Offsets.pop_back_val().Kind;
-        if (Kind == IfElseDirective::If)
-          break;
-      } while (!Offsets.empty());
-      break;
-    }
-    default:
-      break;
+  for (const dependency_directives_scan::Directive &Directive : Directives) {
+    Optional<tok::TokenKind> PrevTokenKind;
+    for (const dependency_directives_scan::Token &Tok : Directive.Tokens) {
+      if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind, Tok))
+        OS << ' ';
+      PrevTokenKind = Tok.Kind;
+      OS << Source.slice(Tok.Offset, Tok.getEnd());
     }
   }
-  return false;
-}
-
-bool clang::scanSourceForDependencyDirectives(
-    StringRef Input, SmallVectorImpl<char> &Output,
-    SmallVectorImpl<Directive> &Directives, DiagnosticsEngine *Diags,
-    SourceLocation InputSourceLoc) {
-  Output.clear();
-  Directives.clear();
-  return Scanner(Output, Directives, Input, Diags, InputSourceLoc).scan();
 }

diff  --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index a85b1186359e4..a0a7a6ae789b4 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -226,13 +226,11 @@ Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc,
   return L;
 }
 
-bool Lexer::skipOver(unsigned NumBytes) {
-  IsAtPhysicalStartOfLine = true;
-  IsAtStartOfLine = true;
-  if ((BufferPtr + NumBytes) > BufferEnd)
-    return true;
-  BufferPtr += NumBytes;
-  return false;
+void Lexer::seek(unsigned Offset, bool IsAtStartOfLine) {
+  this->IsAtPhysicalStartOfLine = IsAtStartOfLine;
+  this->IsAtStartOfLine = IsAtStartOfLine;
+  assert((BufferStart + Offset) <= BufferEnd);
+  BufferPtr = BufferStart + Offset;
 }
 
 template <typename T> static void StringifyImpl(T &Str, char Quote) {
@@ -2939,6 +2937,13 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
 unsigned Lexer::isNextPPTokenLParen() {
   assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?");
 
+  if (isDependencyDirectivesLexer()) {
+    if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
+      return 2;
+    return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
+        tok::l_paren);
+  }
+
   // Switch to 'skipping' mode.  This will ensure that we can lex a token
   // without emitting diagnostics, disables macro expansion, and will cause EOF
   // to return an EOF token instead of popping the include stack.
@@ -3281,6 +3286,8 @@ void Lexer::PropagateLineStartLeadingSpaceInfo(Token &Result) {
 }
 
 bool Lexer::Lex(Token &Result) {
+  assert(!isDependencyDirectivesLexer());
+
   // Start a new token.
   Result.startToken();
 
@@ -4102,3 +4109,129 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
   // We parsed the directive; lex a token with the new state.
   return false;
 }
+
+const char *Lexer::convertDependencyDirectiveToken(
+    const dependency_directives_scan::Token &DDTok, Token &Result) {
+  const char *TokPtr = BufferStart + DDTok.Offset;
+  Result.startToken();
+  Result.setLocation(getSourceLocation(TokPtr));
+  Result.setKind(DDTok.Kind);
+  Result.setFlag((Token::TokenFlags)DDTok.Flags);
+  Result.setLength(DDTok.Length);
+  BufferPtr = TokPtr + DDTok.Length;
+  return TokPtr;
+}
+
+bool Lexer::LexDependencyDirectiveToken(Token &Result) {
+  assert(isDependencyDirectivesLexer());
+
+  using namespace dependency_directives_scan;
+
+  while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) {
+    if (DepDirectives.front().Kind == pp_eof)
+      return LexEndOfFile(Result, BufferEnd);
+    NextDepDirectiveTokenIndex = 0;
+    DepDirectives = DepDirectives.drop_front();
+  }
+
+  const dependency_directives_scan::Token &DDTok =
+      DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++];
+
+  const char *TokPtr = convertDependencyDirectiveToken(DDTok, Result);
+
+  if (Result.is(tok::hash) && Result.isAtStartOfLine()) {
+    PP->HandleDirective(Result);
+    return false;
+  }
+  if (Result.is(tok::raw_identifier)) {
+    Result.setRawIdentifierData(TokPtr);
+    if (!isLexingRawMode()) {
+      IdentifierInfo *II = PP->LookUpIdentifierInfo(Result);
+      if (II->isHandleIdentifierCase())
+        return PP->HandleIdentifier(Result);
+    }
+    return true;
+  }
+  if (Result.isLiteral()) {
+    Result.setLiteralData(TokPtr);
+    return true;
+  }
+  if (Result.is(tok::colon) &&
+      (LangOpts.CPlusPlus || LangOpts.DoubleSquareBracketAttributes)) {
+    // Convert consecutive colons to 'tok::coloncolon'.
+    if (*BufferPtr == ':') {
+      assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
+          tok::colon));
+      ++NextDepDirectiveTokenIndex;
+      Result.setKind(tok::coloncolon);
+    }
+    return true;
+  }
+  if (Result.is(tok::eod))
+    ParsingPreprocessorDirective = false;
+
+  return true;
+}
+
+bool Lexer::LexDependencyDirectiveTokenWhileSkipping(Token &Result) {
+  assert(isDependencyDirectivesLexer());
+
+  using namespace dependency_directives_scan;
+
+  bool Stop = false;
+  unsigned NestedIfs = 0;
+  do {
+    DepDirectives = DepDirectives.drop_front();
+    switch (DepDirectives.front().Kind) {
+    case pp_none:
+      llvm_unreachable("unexpected 'pp_none'");
+    case pp_include:
+    case pp___include_macros:
+    case pp_define:
+    case pp_undef:
+    case pp_import:
+    case pp_pragma_import:
+    case pp_pragma_once:
+    case pp_pragma_push_macro:
+    case pp_pragma_pop_macro:
+    case pp_pragma_include_alias:
+    case pp_include_next:
+    case decl_at_import:
+    case cxx_module_decl:
+    case cxx_import_decl:
+    case cxx_export_module_decl:
+    case cxx_export_import_decl:
+      break;
+    case pp_if:
+    case pp_ifdef:
+    case pp_ifndef:
+      ++NestedIfs;
+      break;
+    case pp_elif:
+    case pp_elifdef:
+    case pp_elifndef:
+    case pp_else:
+      if (!NestedIfs) {
+        Stop = true;
+      }
+      break;
+    case pp_endif:
+      if (!NestedIfs) {
+        Stop = true;
+      } else {
+        --NestedIfs;
+      }
+      break;
+    case pp_eof:
+      return LexEndOfFile(Result, BufferEnd);
+    }
+  } while (!Stop);
+
+  const dependency_directives_scan::Token &DDTok =
+      DepDirectives.front().Tokens.front();
+  assert(DDTok.is(tok::hash));
+  NextDepDirectiveTokenIndex = 1;
+
+  convertDependencyDirectiveToken(DDTok, Result);
+  return false;
+}

diff  --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index d947c1580f5ca..97d7466d79a19 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -443,41 +443,6 @@ SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType,
   return DiscardUntilEndOfDirective().getEnd();
 }
 
-Optional<unsigned> Preprocessor::getSkippedRangeForExcludedConditionalBlock(
-    SourceLocation HashLoc) {
-  if (!ExcludedConditionalDirectiveSkipMappings)
-    return None;
-  if (!HashLoc.isFileID())
-    return None;
-
-  std::pair<FileID, unsigned> HashFileOffset =
-      SourceMgr.getDecomposedLoc(HashLoc);
-  Optional<llvm::MemoryBufferRef> Buf =
-      SourceMgr.getBufferOrNone(HashFileOffset.first);
-  if (!Buf)
-    return None;
-  auto It =
-      ExcludedConditionalDirectiveSkipMappings->find(Buf->getBufferStart());
-  if (It == ExcludedConditionalDirectiveSkipMappings->end())
-    return None;
-
-  const PreprocessorSkippedRangeMapping &SkippedRanges = *It->getSecond();
-  // Check if the offset of '#' is mapped in the skipped ranges.
-  auto MappingIt = SkippedRanges.find(HashFileOffset.second);
-  if (MappingIt == SkippedRanges.end())
-    return None;
-
-  unsigned BytesToSkip = MappingIt->getSecond();
-  unsigned CurLexerBufferOffset = CurLexer->getCurrentBufferOffset();
-  assert(CurLexerBufferOffset >= HashFileOffset.second &&
-         "lexer is before the hash?");
-  // Take into account the fact that the lexer has already advanced, so the
-  // number of bytes to skip must be adjusted.
-  unsigned LengthDiff = CurLexerBufferOffset - HashFileOffset.second;
-  assert(BytesToSkip >= LengthDiff && "lexer is after the skipped range?");
-  return BytesToSkip - LengthDiff;
-}
-
 void Preprocessor::SuggestTypoedDirective(const Token &Tok,
                                           StringRef Directive,
                                           const SourceLocation &EndLoc) const {
@@ -527,36 +492,42 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
   // disabling warnings, etc.
   CurPPLexer->LexingRawMode = true;
   Token Tok;
-  if (auto SkipLength =
-          getSkippedRangeForExcludedConditionalBlock(HashTokenLoc)) {
-    // Skip to the next '#endif' / '#else' / '#elif'.
-    CurLexer->skipOver(*SkipLength);
-  }
   SourceLocation endLoc;
   while (true) {
-    CurLexer->Lex(Tok);
+    if (CurLexer->isDependencyDirectivesLexer()) {
+      CurLexer->LexDependencyDirectiveTokenWhileSkipping(Tok);
+    } else {
+      while (true) {
+        CurLexer->Lex(Tok);
 
-    if (Tok.is(tok::code_completion)) {
-      setCodeCompletionReached();
-      if (CodeComplete)
-        CodeComplete->CodeCompleteInConditionalExclusion();
-      continue;
-    }
+        if (Tok.is(tok::code_completion)) {
+          setCodeCompletionReached();
+          if (CodeComplete)
+            CodeComplete->CodeCompleteInConditionalExclusion();
+          continue;
+        }
 
-    // If this is the end of the buffer, we have an error.
-    if (Tok.is(tok::eof)) {
-      // We don't emit errors for unterminated conditionals here,
-      // Lexer::LexEndOfFile can do that properly.
-      // Just return and let the caller lex after this #include.
-      if (PreambleConditionalStack.isRecording())
-        PreambleConditionalStack.SkipInfo.emplace(
-            HashTokenLoc, IfTokenLoc, FoundNonSkipPortion, FoundElse, ElseLoc);
-      break;
-    }
+        // If this is the end of the buffer, we have an error.
+        if (Tok.is(tok::eof)) {
+          // We don't emit errors for unterminated conditionals here,
+          // Lexer::LexEndOfFile can do that properly.
+          // Just return and let the caller lex after this #include.
+          if (PreambleConditionalStack.isRecording())
+            PreambleConditionalStack.SkipInfo.emplace(HashTokenLoc, IfTokenLoc,
+                                                      FoundNonSkipPortion,
+                                                      FoundElse, ElseLoc);
+          break;
+        }
 
-    // If this token is not a preprocessor directive, just skip it.
-    if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine())
-      continue;
+        // If this token is not a preprocessor directive, just skip it.
+        if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine())
+          continue;
+
+        break;
+      }
+    }
+    if (Tok.is(tok::eof))
+      break;
 
     // We just parsed a # character at the start of a line, so we're in
     // directive mode.  Tell the lexer this so any newlines we see will be

diff  --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp
index e6a7eb6a03506..e3379aecba725 100644
--- a/clang/lib/Lex/PPLexerChange.cpp
+++ b/clang/lib/Lex/PPLexerChange.cpp
@@ -91,8 +91,19 @@ bool Preprocessor::EnterSourceFile(FileID FID, ConstSearchDirIterator CurDir,
         CodeCompletionFileLoc.getLocWithOffset(CodeCompletionOffset);
   }
 
-  EnterSourceFileWithLexer(
-      new Lexer(FID, *InputFile, *this, IsFirstIncludeOfFile), CurDir);
+  Lexer *TheLexer = new Lexer(FID, *InputFile, *this, IsFirstIncludeOfFile);
+  if (getPreprocessorOpts().DependencyDirectivesForFile &&
+      FID != PredefinesFileID) {
+    if (Optional<FileEntryRef> File = SourceMgr.getFileEntryRefForID(FID)) {
+      if (Optional<ArrayRef<dependency_directives_scan::Directive>>
+              DepDirectives =
+                  getPreprocessorOpts().DependencyDirectivesForFile(*File)) {
+        TheLexer->DepDirectives = *DepDirectives;
+      }
+    }
+  }
+
+  EnterSourceFileWithLexer(TheLexer, CurDir);
   return false;
 }
 
@@ -110,7 +121,9 @@ void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer,
   CurDirLookup = CurDir;
   CurLexerSubmodule = nullptr;
   if (CurLexerKind != CLK_LexAfterModuleImport)
-    CurLexerKind = CLK_Lexer;
+    CurLexerKind = TheLexer->isDependencyDirectivesLexer()
+                       ? CLK_DependencyDirectivesLexer
+                       : CLK_Lexer;
 
   // Notify the client, if desired, that we are in a new source file.
   if (Callbacks && !CurLexer->Is_PragmaLexer) {

diff  --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index abbd33331e9d3..d077616eb84bc 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -158,11 +158,6 @@ Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
   if (this->PPOpts->GeneratePreamble)
     PreambleConditionalStack.startRecording();
 
-  ExcludedConditionalDirectiveSkipMappings =
-      this->PPOpts->ExcludedConditionalDirectiveSkipMappings;
-  if (ExcludedConditionalDirectiveSkipMappings)
-    ExcludedConditionalDirectiveSkipMappings->clear();
-
   MaxTokens = LangOpts.MaxTokens;
 }
 
@@ -382,7 +377,9 @@ StringRef Preprocessor::getLastMacroWithSpelling(
 
 void Preprocessor::recomputeCurLexerKind() {
   if (CurLexer)
-    CurLexerKind = CLK_Lexer;
+    CurLexerKind = CurLexer->isDependencyDirectivesLexer()
+                       ? CLK_DependencyDirectivesLexer
+                       : CLK_Lexer;
   else if (CurTokenLexer)
     CurLexerKind = CLK_TokenLexer;
   else
@@ -645,6 +642,9 @@ void Preprocessor::SkipTokensWhileUsingPCH() {
     case CLK_CachingLexer:
       CachingLex(Tok);
       break;
+    case CLK_DependencyDirectivesLexer:
+      CurLexer->LexDependencyDirectiveToken(Tok);
+      break;
     case CLK_LexAfterModuleImport:
       LexAfterModuleImport(Tok);
       break;
@@ -906,6 +906,9 @@ void Preprocessor::Lex(Token &Result) {
       CachingLex(Result);
       ReturnedToken = true;
       break;
+    case CLK_DependencyDirectivesLexer:
+      ReturnedToken = CurLexer->LexDependencyDirectiveToken(Result);
+      break;
     case CLK_LexAfterModuleImport:
       ReturnedToken = LexAfterModuleImport(Result);
       break;

diff  --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index c47f7d068eb7f..026bdfe03f28c 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
-#include "clang/Lex/DependencyDirectivesScanner.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SmallVectorMemoryBuffer.h"
 #include "llvm/Support/Threading.h"
@@ -44,64 +43,41 @@ DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
 EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary(
     const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) {
   if (Entry.isError() || Entry.isDirectory() || Disable ||
-      !shouldScanForDirectives(Filename, Entry.getUniqueID()))
-    return EntryRef(/*Minimized=*/false, Filename, Entry);
+      !shouldScanForDirectives(Filename))
+    return EntryRef(Filename, Entry);
 
   CachedFileContents *Contents = Entry.getCachedContents();
   assert(Contents && "contents not initialized");
 
   // Double-checked locking.
-  if (Contents->MinimizedAccess.load())
-    return EntryRef(/*Minimized=*/true, Filename, Entry);
+  if (Contents->DepDirectives.load())
+    return EntryRef(Filename, Entry);
 
   std::lock_guard<std::mutex> GuardLock(Contents->ValueLock);
 
   // Double-checked locking.
-  if (Contents->MinimizedAccess.load())
-    return EntryRef(/*Minimized=*/true, Filename, Entry);
+  if (Contents->DepDirectives.load())
+    return EntryRef(Filename, Entry);
 
-  llvm::SmallString<1024> MinimizedFileContents;
-  // Minimize the file down to directives that might affect the dependencies.
-  SmallVector<dependency_directives_scan::Directive, 64> Tokens;
+  SmallVector<dependency_directives_scan::Directive, 64> Directives;
+  // Scan the file for preprocessor directives that might affect the
+  // dependencies.
   if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(),
-                                        MinimizedFileContents, Tokens)) {
+                                        Contents->DepDirectiveTokens,
+                                        Directives)) {
+    Contents->DepDirectiveTokens.clear();
     // FIXME: Propagate the diagnostic if desired by the client.
-    // Use the original file if the minimization failed.
-    Contents->MinimizedStorage =
-        llvm::MemoryBuffer::getMemBuffer(*Contents->Original);
-    Contents->MinimizedAccess.store(Contents->MinimizedStorage.get());
-    return EntryRef(/*Minimized=*/true, Filename, Entry);
+    Contents->DepDirectives.store(new Optional<DependencyDirectivesTy>());
+    return EntryRef(Filename, Entry);
   }
 
-  // The contents produced by the minimizer must be null terminated.
-  assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' &&
-         "not null terminated contents");
-
-  // Compute the skipped PP ranges that speedup skipping over inactive
-  // preprocessor blocks.
-  llvm::SmallVector<dependency_directives_scan::SkippedRange, 32> SkippedRanges;
-  dependency_directives_scan::computeSkippedRanges(Tokens, SkippedRanges);
-  PreprocessorSkippedRangeMapping Mapping;
-  for (const auto &Range : SkippedRanges) {
-    if (Range.Length < 16) {
-      // Ignore small ranges as non-profitable.
-      // FIXME: This is a heuristic, its worth investigating the tradeoffs
-      // when it should be applied.
-      continue;
-    }
-    Mapping[Range.Offset] = Range.Length;
-  }
-  Contents->PPSkippedRangeMapping = std::move(Mapping);
-
-  Contents->MinimizedStorage = std::make_unique<llvm::SmallVectorMemoryBuffer>(
-      std::move(MinimizedFileContents));
-  // This function performed double-checked locking using `MinimizedAccess`.
-  // Assigning it must be the last thing this function does. If we were to
-  // assign it before `PPSkippedRangeMapping`, other threads may skip the
-  // critical section (`MinimizedAccess != nullptr`) and access the mappings
-  // that are about to be initialized, leading to a data race.
-  Contents->MinimizedAccess.store(Contents->MinimizedStorage.get());
-  return EntryRef(/*Minimized=*/true, Filename, Entry);
+  // This function performed double-checked locking using `DepDirectives`.
+  // Assigning it must be the last thing this function does, otherwise other
+  // threads may skip the
+  // critical section (`DepDirectives != nullptr`), leading to a data race.
+  Contents->DepDirectives.store(
+      new Optional<DependencyDirectivesTy>(std::move(Directives)));
+  return EntryRef(Filename, Entry);
 }
 
 DependencyScanningFilesystemSharedCache::
@@ -208,19 +184,9 @@ static bool shouldCacheStatFailures(StringRef Filename) {
   return shouldScanForDirectivesBasedOnExtension(Filename);
 }
 
-void DependencyScanningWorkerFilesystem::disableDirectivesScanning(
-    StringRef Filename) {
-  // Since we're not done setting up `NotToBeScanned` yet, we need to disable
-  // directive scanning explicitly.
-  if (llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(
-          Filename, /*DisableDirectivesScanning=*/true))
-    NotToBeScanned.insert(Result->getStatus().getUniqueID());
-}
-
 bool DependencyScanningWorkerFilesystem::shouldScanForDirectives(
-    StringRef Filename, llvm::sys::fs::UniqueID UID) {
-  return shouldScanForDirectivesBasedOnExtension(Filename) &&
-         !NotToBeScanned.contains(UID);
+    StringRef Filename) {
+  return shouldScanForDirectivesBasedOnExtension(Filename);
 }
 
 const CachedFileSystemEntry &
@@ -307,9 +273,7 @@ class DepScanFile final : public llvm::vfs::File {
               llvm::vfs::Status Stat)
       : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
 
-  static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
-  create(EntryRef Entry,
-         ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings);
+  static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> create(EntryRef Entry);
 
   llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
 
@@ -329,8 +293,7 @@ class DepScanFile final : public llvm::vfs::File {
 } // end anonymous namespace
 
 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
-DepScanFile::create(EntryRef Entry,
-                    ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings) {
+DepScanFile::create(EntryRef Entry) {
   assert(!Entry.isError() && "error");
 
   if (Entry.isDirectory())
@@ -342,10 +305,6 @@ DepScanFile::create(EntryRef Entry,
                                        /*RequiresNullTerminator=*/false),
       Entry.getStatus());
 
-  const auto *EntrySkipMappings = Entry.getPPSkippedRangeMapping();
-  if (EntrySkipMappings && !EntrySkipMappings->empty())
-    PPSkipMappings[Result->Buffer->getBufferStart()] = EntrySkipMappings;
-
   return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
       std::unique_ptr<llvm::vfs::File>(std::move(Result)));
 }
@@ -358,5 +317,5 @@ DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
   llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
   if (!Result)
     return Result.getError();
-  return DepScanFile::create(Result.get(), PPSkipMappings);
+  return DepScanFile::create(Result.get());
 }

diff  --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index 30f5aeae118a6..04f7044bc4236 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -137,12 +137,11 @@ class DependencyScanningAction : public tooling::ToolAction {
   DependencyScanningAction(
       StringRef WorkingDirectory, DependencyConsumer &Consumer,
       llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS,
-      ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings,
       ScanningOutputFormat Format, bool OptimizeArgs,
       llvm::Optional<StringRef> ModuleName = None)
       : WorkingDirectory(WorkingDirectory), Consumer(Consumer),
-        DepFS(std::move(DepFS)), PPSkipMappings(PPSkipMappings), Format(Format),
-        OptimizeArgs(OptimizeArgs), ModuleName(ModuleName) {}
+        DepFS(std::move(DepFS)), Format(Format), OptimizeArgs(OptimizeArgs),
+        ModuleName(ModuleName) {}
 
   bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation,
                      FileManager *FileMgr,
@@ -183,29 +182,21 @@ class DependencyScanningAction : public tooling::ToolAction {
 
     // Use the dependency scanning optimized file system if requested to do so.
     if (DepFS) {
-      DepFS->enableDirectivesScanningOfAllFiles();
-      // Don't minimize any files that contributed to prebuilt modules. The
-      // implicit build validates the modules by comparing the reported sizes of
-      // their inputs to the current state of the filesystem. Minimization would
-      // throw this mechanism off.
-      for (const auto &File : PrebuiltModulesInputFiles)
-        DepFS->disableDirectivesScanning(File.getKey());
-      // Don't minimize any files that were explicitly passed in the build
-      // settings and that might be opened.
-      for (const auto &E : ScanInstance.getHeaderSearchOpts().UserEntries)
-        DepFS->disableDirectivesScanning(E.Path);
-      for (const auto &F : ScanInstance.getHeaderSearchOpts().VFSOverlayFiles)
-        DepFS->disableDirectivesScanning(F);
-
       // Support for virtual file system overlays on top of the caching
       // filesystem.
       FileMgr->setVirtualFileSystem(createVFSFromCompilerInvocation(
           ScanInstance.getInvocation(), ScanInstance.getDiagnostics(), DepFS));
 
-      // Pass the skip mappings which should speed up excluded conditional block
-      // skipping in the preprocessor.
-      ScanInstance.getPreprocessorOpts()
-          .ExcludedConditionalDirectiveSkipMappings = &PPSkipMappings;
+      llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> LocalDepFS =
+          DepFS;
+      ScanInstance.getPreprocessorOpts().DependencyDirectivesForFile =
+          [LocalDepFS = std::move(LocalDepFS)](FileEntryRef File)
+          -> Optional<ArrayRef<dependency_directives_scan::Directive>> {
+        if (llvm::ErrorOr<EntryRef> Entry =
+                LocalDepFS->getOrCreateFileSystemEntry(File.getName()))
+          return Entry->getDirectiveTokens();
+        return None;
+      };
     }
 
     // Create the dependency collector that will collect the produced
@@ -262,7 +253,6 @@ class DependencyScanningAction : public tooling::ToolAction {
   StringRef WorkingDirectory;
   DependencyConsumer &Consumer;
   llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS;
-  ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings;
   ScanningOutputFormat Format;
   bool OptimizeArgs;
   llvm::Optional<StringRef> ModuleName;
@@ -289,7 +279,7 @@ DependencyScanningWorker::DependencyScanningWorker(
 
   if (Service.getMode() == ScanningMode::DependencyDirectivesScan)
     DepFS = new DependencyScanningWorkerFilesystem(Service.getSharedCache(),
-                                                   RealFS, PPSkipMappings);
+                                                   RealFS);
   if (Service.canReuseFileManager())
     Files = new FileManager(FileSystemOptions(), RealFS);
 }
@@ -340,8 +330,8 @@ llvm::Error DependencyScanningWorker::computeDependencies(
   return runWithDiags(CreateAndPopulateDiagOpts(FinalCCommandLine).release(),
                       [&](DiagnosticConsumer &DC, DiagnosticOptions &DiagOpts) {
                         DependencyScanningAction Action(
-                            WorkingDirectory, Consumer, DepFS, PPSkipMappings,
-                            Format, OptimizeArgs, ModuleName);
+                            WorkingDirectory, Consumer, DepFS, Format,
+                            OptimizeArgs, ModuleName);
                         // Create an invocation that uses the underlying file
                         // system to ensure that any file system requests that
                         // are made by the driver do not go through the

diff  --git a/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c
index fa4ff7dcb8bb8..cb4525e38711c 100644
--- a/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c
+++ b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c
@@ -1,3 +1,4 @@
-// RUN: %clang_cc1 -verify -print-dependency-directives-minimized-source %s 2>&1
+// RUN: %clang_cc1 -print-dependency-directives-minimized-source %s 2>&1 | FileCheck %s
 
-#define 0 0 // expected-error {{macro name must be an identifier}}
+#define 0 0
+// CHECK: #define 0 0

diff  --git a/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c b/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c
index 98b1cc88e7c18..0971649caf673 100644
--- a/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c
+++ b/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c
@@ -15,7 +15,7 @@
 #pragma include_alias(<string>,   "mystring.h")
 
 // CHECK:      #pragma once
-// CHECK-NEXT: #pragma push_macro( "MYMACRO" )
+// CHECK-NEXT: #pragma push_macro("MYMACRO")
 // CHECK-NEXT: #pragma pop_macro("MYMACRO")
 // CHECK-NEXT: #pragma clang module import mymodule
 // CHECK-NEXT: #pragma include_alias(<string>, "mystring.h")

diff  --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
index cec28eae0f2b8..8c57539852830 100644
--- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
+++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
@@ -14,39 +14,58 @@ using namespace llvm;
 using namespace clang;
 using namespace clang::dependency_directives_scan;
 
-static bool minimizeSourceToDependencyDirectives(StringRef Input,
-                                                 SmallVectorImpl<char> &Out) {
-  SmallVector<dependency_directives_scan::Directive, 32> Directives;
-  return scanSourceForDependencyDirectives(Input, Out, Directives);
+static bool minimizeSourceToDependencyDirectives(
+    StringRef Input, SmallVectorImpl<char> &Out,
+    SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
+    SmallVectorImpl<Directive> &Directives) {
+  Out.clear();
+  Tokens.clear();
+  Directives.clear();
+  if (scanSourceForDependencyDirectives(Input, Tokens, Directives))
+    return true;
+
+  raw_svector_ostream OS(Out);
+  printDependencyDirectivesAsSource(Input, Directives, OS);
+  if (!Out.empty() && Out.back() != '\n')
+    Out.push_back('\n');
+  Out.push_back('\0');
+  Out.pop_back();
+
+  return false;
 }
 
-static bool
-minimizeSourceToDependencyDirectives(StringRef Input,
-                                     SmallVectorImpl<char> &Out,
-                                     SmallVectorImpl<Directive> &Directives) {
-  return scanSourceForDependencyDirectives(Input, Out, Directives);
+static bool minimizeSourceToDependencyDirectives(StringRef Input,
+                                                 SmallVectorImpl<char> &Out) {
+  SmallVector<dependency_directives_scan::Token, 16> Tokens;
+  SmallVector<Directive, 32> Directives;
+  return minimizeSourceToDependencyDirectives(Input, Out, Tokens, Directives);
 }
 
 namespace {
 
 TEST(MinimizeSourceToDependencyDirectivesTest, Empty) {
   SmallVector<char, 128> Out;
+  SmallVector<dependency_directives_scan::Token, 4> Tokens;
   SmallVector<Directive, 4> Directives;
 
-  ASSERT_FALSE(minimizeSourceToDependencyDirectives("", Out, Directives));
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("", Out, Tokens, Directives));
   EXPECT_TRUE(Out.empty());
+  EXPECT_TRUE(Tokens.empty());
   ASSERT_EQ(1u, Directives.size());
   ASSERT_EQ(pp_eof, Directives.back().Kind);
 
-  ASSERT_FALSE(
-      minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Directives));
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Tokens,
+                                                    Directives));
   EXPECT_TRUE(Out.empty());
+  EXPECT_TRUE(Tokens.empty());
   ASSERT_EQ(1u, Directives.size());
   ASSERT_EQ(pp_eof, Directives.back().Kind);
 }
 
-TEST(MinimizeSourceToDependencyDirectivesTest, AllDirectives) {
+TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) {
   SmallVector<char, 128> Out;
+  SmallVector<dependency_directives_scan::Token, 4> Tokens;
   SmallVector<Directive, 4> Directives;
 
   ASSERT_FALSE(
@@ -71,7 +90,7 @@ TEST(MinimizeSourceToDependencyDirectivesTest, AllDirectives) {
                                            "#pragma include_alias(<A>, <B>)\n"
                                            "export module m;\n"
                                            "import m;\n",
-                                           Out, Directives));
+                                           Out, Tokens, Directives));
   EXPECT_EQ(pp_define, Directives[0].Kind);
   EXPECT_EQ(pp_undef, Directives[1].Kind);
   EXPECT_EQ(pp_endif, Directives[2].Kind);
@@ -91,19 +110,28 @@ TEST(MinimizeSourceToDependencyDirectivesTest, AllDirectives) {
   EXPECT_EQ(pp_pragma_push_macro, Directives[16].Kind);
   EXPECT_EQ(pp_pragma_pop_macro, Directives[17].Kind);
   EXPECT_EQ(pp_pragma_include_alias, Directives[18].Kind);
-  EXPECT_EQ(cxx_export_decl, Directives[19].Kind);
-  EXPECT_EQ(cxx_module_decl, Directives[20].Kind);
-  EXPECT_EQ(cxx_import_decl, Directives[21].Kind);
-  EXPECT_EQ(pp_eof, Directives[22].Kind);
+  EXPECT_EQ(cxx_export_module_decl, Directives[19].Kind);
+  EXPECT_EQ(cxx_import_decl, Directives[20].Kind);
+  EXPECT_EQ(pp_eof, Directives[21].Kind);
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, EmptyHash) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#\n#define MACRO a\n", Out));
+  EXPECT_STREQ("#define MACRO a\n", Out.data());
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, Define) {
   SmallVector<char, 128> Out;
+  SmallVector<dependency_directives_scan::Token, 4> Tokens;
   SmallVector<Directive, 4> Directives;
 
-  ASSERT_FALSE(
-      minimizeSourceToDependencyDirectives("#define MACRO", Out, Directives));
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO", Out,
+                                                    Tokens, Directives));
   EXPECT_STREQ("#define MACRO\n", Out.data());
+  ASSERT_EQ(4u, Tokens.size());
   ASSERT_EQ(2u, Directives.size());
   ASSERT_EQ(pp_define, Directives.front().Kind);
 }
@@ -144,25 +172,25 @@ TEST(MinimizeSourceToDependencyDirectivesTest, DefineMacroArguments) {
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(
       "#define MACRO   con  tent   ", Out));
-  EXPECT_STREQ("#define MACRO con  tent\n", Out.data());
+  EXPECT_STREQ("#define MACRO con tent\n", Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(
       "#define MACRO()   con  tent   ", Out));
-  EXPECT_STREQ("#define MACRO() con  tent\n", Out.data());
+  EXPECT_STREQ("#define MACRO() con tent\n", Out.data());
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, DefineInvalidMacroArguments) {
   SmallVector<char, 128> Out;
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO((a))", Out));
-  EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data());
+  EXPECT_STREQ("#define MACRO((a))\n", Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO(", Out));
-  EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data());
+  EXPECT_STREQ("#define MACRO(\n", Out.data());
 
   ASSERT_FALSE(
       minimizeSourceToDependencyDirectives("#define MACRO(a * b)", Out));
-  EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data());
+  EXPECT_STREQ("#define MACRO(a*b)\n", Out.data());
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, DefineHorizontalWhitespace) {
@@ -170,19 +198,19 @@ TEST(MinimizeSourceToDependencyDirectivesTest, DefineHorizontalWhitespace) {
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(
       "#define MACRO(\t)\tcon \t tent\t", Out));
-  EXPECT_STREQ("#define MACRO() con \t tent\n", Out.data());
+  EXPECT_STREQ("#define MACRO() con tent\n", Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(
       "#define MACRO(\f)\fcon \f tent\f", Out));
-  EXPECT_STREQ("#define MACRO() con \f tent\n", Out.data());
+  EXPECT_STREQ("#define MACRO() con tent\n", Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(
       "#define MACRO(\v)\vcon \v tent\v", Out));
-  EXPECT_STREQ("#define MACRO() con \v tent\n", Out.data());
+  EXPECT_STREQ("#define MACRO() con tent\n", Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(
       "#define MACRO \t\v\f\v\t con\f\t\vtent\v\f \v", Out));
-  EXPECT_STREQ("#define MACRO con\f\t\vtent\n", Out.data());
+  EXPECT_STREQ("#define MACRO con tent\n", Out.data());
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, DefineMultilineArgs) {
@@ -255,25 +283,27 @@ TEST(MinimizeSourceToDependencyDirectivesTest,
 TEST(MinimizeSourceToDependencyDirectivesTest, DefineNumber) {
   SmallVector<char, 128> Out;
 
-  ASSERT_TRUE(minimizeSourceToDependencyDirectives("#define 0\n", Out));
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define 0\n", Out));
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, DefineNoName) {
   SmallVector<char, 128> Out;
 
-  ASSERT_TRUE(minimizeSourceToDependencyDirectives("#define &\n", Out));
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define &\n", Out));
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, DefineNoWhitespace) {
   SmallVector<char, 128> Out;
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND&\n", Out));
-  EXPECT_STREQ("#define AND &\n", Out.data());
+  EXPECT_STREQ("#define AND&\n", Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND\\\n"
                                                     "&\n",
                                                     Out));
-  EXPECT_STREQ("#define AND &\n", Out.data());
+  EXPECT_STREQ("#define AND\\\n"
+               "&\n",
+               Out.data());
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, MultilineComment) {
@@ -303,6 +333,14 @@ TEST(MinimizeSourceToDependencyDirectivesTest, MultilineCommentInStrings) {
                Out.data());
 }
 
+TEST(MinimizeSourceToDependencyDirectivesTest, CommentSlashSlashStar) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#define MACRO 1 //* blah */\n", Out));
+  EXPECT_STREQ("#define MACRO 1\n", Out.data());
+}
+
 TEST(MinimizeSourceToDependencyDirectivesTest, Ifdef) {
   SmallVector<char, 128> Out;
 
@@ -481,6 +519,9 @@ TEST(MinimizeSourceToDependencyDirectivesTest, Include) {
   ASSERT_FALSE(
       minimizeSourceToDependencyDirectives("#__include_macros <A>\n", Out));
   EXPECT_STREQ("#__include_macros <A>\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#include MACRO\n", Out));
+  EXPECT_STREQ("#include MACRO\n", Out.data());
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, AtImport) {
@@ -507,8 +548,9 @@ TEST(MinimizeSourceToDependencyDirectivesTest, AtImportFailures) {
   SmallVector<char, 128> Out;
 
   ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n", Out));
-  ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out));
-  ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out));
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out));
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out));
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, RawStringLiteral) {
@@ -559,7 +601,8 @@ TEST(MinimizeSourceToDependencyDirectivesTest, SplitIdentifier) {
                                                     "#define GUARD\n"
                                                     "#endif\n",
                                                     Out));
-  EXPECT_STREQ("#ifndef GUARD\n"
+  EXPECT_STREQ("#if\\\n"
+               "ndef GUARD\n"
                "#define GUARD\n"
                "#endif\n",
                Out.data());
@@ -567,12 +610,16 @@ TEST(MinimizeSourceToDependencyDirectivesTest, SplitIdentifier) {
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\n"
                                                     "RD\n",
                                                     Out));
-  EXPECT_STREQ("#define GUARD\n", Out.data());
+  EXPECT_STREQ("#define GUA\\\n"
+               "RD\n",
+               Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\r"
                                                     "RD\n",
                                                     Out));
-  EXPECT_STREQ("#define GUARD\n", Out.data());
+  EXPECT_STREQ("#define GUA\\\r"
+               "RD\n",
+               Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\n"
                                                     "           RD\n",
@@ -588,7 +635,10 @@ TEST(MinimizeSourceToDependencyDirectivesTest,
                                                     "2 + \\\t\n"
                                                     "3\n",
                                                     Out));
-  EXPECT_STREQ("#define A 1 + 2 + 3\n", Out.data());
+  EXPECT_STREQ("#define A 1+\\  \n"
+               "2+\\\t\n"
+               "3\n",
+               Out.data());
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, PoundWarningAndError) {
@@ -682,6 +732,7 @@ int z = 128'78;
 
 TEST(MinimizeSourceToDependencyDirectivesTest, PragmaOnce) {
   SmallVector<char, 128> Out;
+  SmallVector<dependency_directives_scan::Token, 4> Tokens;
   SmallVector<Directive, 4> Directives;
 
   StringRef Source = R"(// comment
@@ -689,7 +740,8 @@ TEST(MinimizeSourceToDependencyDirectivesTest, PragmaOnce) {
 // another comment
 #include <test.h>
 )";
-  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives));
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives));
   EXPECT_STREQ("#pragma once\n#include <test.h>\n", Out.data());
   ASSERT_EQ(Directives.size(), 3u);
   EXPECT_EQ(Directives[0].Kind, dependency_directives_scan::pp_pragma_once);
@@ -700,7 +752,7 @@ TEST(MinimizeSourceToDependencyDirectivesTest, PragmaOnce) {
     #include <test.h>
     )";
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
-  EXPECT_STREQ("#pragma once\n#include <test.h>\n", Out.data());
+  EXPECT_STREQ("#pragma once extra tokens\n#include <test.h>\n", Out.data());
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest,
@@ -755,11 +807,12 @@ TEST(MinimizeSourceToDependencyDirectivesTest,
 
   Source = "#define X \"\\ \r\nx\n#include <x>\n";
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
-  EXPECT_STREQ("#define X \"\\ \r\nx\n#include <x>\n", Out.data());
+  EXPECT_STREQ("#define X\"\\ \r\nx\n#include <x>\n", Out.data());
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, CxxModules) {
   SmallVector<char, 128> Out;
+  SmallVector<dependency_directives_scan::Token, 4> Tokens;
   SmallVector<Directive, 4> Directives;
 
   StringRef Source = R"(
@@ -789,81 +842,17 @@ ort \
       import f(->a = 3);
     }
     )";
-  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives));
-  EXPECT_STREQ("#include \"textual-header.h\"\nexport module m;\n"
-               "export import :l [[rename]];\n"
-               "import <<= 3;\nimport a b d e d e f e;\n"
-               "import foo [[no_unique_address]];\nimport foo();\n"
-               "import f(:sefse);\nimport f(->a = 3);\n",
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives));
+  EXPECT_STREQ("#include \"textual-header.h\"\nexport module m;"
+               "exp\\\nort import:l[[rename]];"
+               "import<<=3;import a b d e d e f e;"
+               "import foo[[no_unique_address]];import foo();"
+               "import f(:sefse);import f(->a=3);\n",
                Out.data());
-  ASSERT_EQ(Directives.size(), 12u);
-  EXPECT_EQ(Directives[0].Kind, dependency_directives_scan::pp_include);
-  EXPECT_EQ(Directives[2].Kind, dependency_directives_scan::cxx_module_decl);
-}
-
-TEST(MinimizeSourceToDependencyDirectivesTest, SkippedPPRangesBasic) {
-  SmallString<128> Out;
-  SmallVector<Directive, 32> Directives;
-  StringRef Source = "#ifndef GUARD\n"
-                     "#define GUARD\n"
-                     "void foo();\n"
-                     "#endif\n";
-  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives));
-  SmallVector<SkippedRange, 4> Ranges;
-  ASSERT_FALSE(computeSkippedRanges(Directives, Ranges));
-  EXPECT_EQ(Ranges.size(), 1u);
-  EXPECT_EQ(Ranges[0].Offset, 0);
-  EXPECT_EQ(Ranges[0].Length, (int)Out.find("#endif"));
-}
-
-TEST(MinimizeSourceToDependencyDirectivesTest, SkippedPPRangesBasicElifdef) {
-  SmallString<128> Out;
-  SmallVector<Directive, 32> Directives;
-  StringRef Source = "#ifdef BLAH\n"
-                     "void skip();\n"
-                     "#elifdef BLAM\n"
-                     "void skip();\n"
-                     "#elifndef GUARD\n"
-                     "#define GUARD\n"
-                     "void foo();\n"
-                     "#endif\n";
-  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives));
-  SmallVector<SkippedRange, 4> Ranges;
-  ASSERT_FALSE(computeSkippedRanges(Directives, Ranges));
-  EXPECT_EQ(Ranges.size(), 3u);
-  EXPECT_EQ(Ranges[0].Offset, 0);
-  EXPECT_EQ(Ranges[0].Length, (int)Out.find("#elifdef"));
-  EXPECT_EQ(Ranges[1].Offset, (int)Out.find("#elifdef"));
-  EXPECT_EQ(Ranges[1].Offset + Ranges[1].Length, (int)Out.find("#elifndef"));
-  EXPECT_EQ(Ranges[2].Offset, (int)Out.find("#elifndef"));
-  EXPECT_EQ(Ranges[2].Offset + Ranges[2].Length, (int)Out.rfind("#endif"));
-}
-
-TEST(MinimizeSourceToDependencyDirectivesTest, SkippedPPRangesNested) {
-  SmallString<128> Out;
-  SmallVector<Directive, 32> Directives;
-  StringRef Source = "#ifndef GUARD\n"
-                     "#define GUARD\n"
-                     "#if FOO\n"
-                     "#include hello\n"
-                     "#elif BAR\n"
-                     "#include bye\n"
-                     "#endif\n"
-                     "#else\n"
-                     "#include nothing\n"
-                     "#endif\n";
-  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives));
-  SmallVector<SkippedRange, 4> Ranges;
-  ASSERT_FALSE(computeSkippedRanges(Directives, Ranges));
-  EXPECT_EQ(Ranges.size(), 4u);
-  EXPECT_EQ(Ranges[0].Offset, (int)Out.find("#if FOO"));
-  EXPECT_EQ(Ranges[0].Offset + Ranges[0].Length, (int)Out.find("#elif"));
-  EXPECT_EQ(Ranges[1].Offset, (int)Out.find("#elif BAR"));
-  EXPECT_EQ(Ranges[1].Offset + Ranges[1].Length, (int)Out.find("#endif"));
-  EXPECT_EQ(Ranges[2].Offset, 0);
-  EXPECT_EQ(Ranges[2].Length, (int)Out.find("#else"));
-  EXPECT_EQ(Ranges[3].Offset, (int)Out.find("#else"));
-  EXPECT_EQ(Ranges[3].Offset + Ranges[3].Length, (int)Out.rfind("#endif"));
+  ASSERT_EQ(Directives.size(), 10u);
+  EXPECT_EQ(Directives[0].Kind, pp_include);
+  EXPECT_EQ(Directives[1].Kind, cxx_export_module_decl);
 }
 
 } // end anonymous namespace

diff  --git a/clang/unittests/Tooling/DependencyScannerTest.cpp b/clang/unittests/Tooling/DependencyScannerTest.cpp
index bb819d9c59ec1..a9d6e6e7fb6fd 100644
--- a/clang/unittests/Tooling/DependencyScannerTest.cpp
+++ b/clang/unittests/Tooling/DependencyScannerTest.cpp
@@ -204,53 +204,5 @@ TEST(DependencyScanner, ScanDepsReuseFilemanagerHasInclude) {
   EXPECT_EQ(convert_to_slash(Deps[5]), "/root/symlink.h");
 }
 
-namespace dependencies {
-TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately1) {
-  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
-  VFS->addFile("/mod.h", 0,
-               llvm::MemoryBuffer::getMemBuffer("#include <foo.h>\n"
-                                                "// hi there!\n"));
-
-  DependencyScanningFilesystemSharedCache SharedCache;
-  ExcludedPreprocessorDirectiveSkipMapping Mappings;
-  DependencyScanningWorkerFilesystem DepFS(SharedCache, VFS, Mappings);
-
-  DepFS.enableDirectivesScanningOfAllFiles(); // Let's be explicit for clarity.
-  auto StatusMinimized0 = DepFS.status("/mod.h");
-  DepFS.disableDirectivesScanning("/mod.h");
-  auto StatusFull1 = DepFS.status("/mod.h");
-
-  EXPECT_TRUE(StatusMinimized0);
-  EXPECT_TRUE(StatusFull1);
-  EXPECT_EQ(StatusMinimized0->getSize(), 17u);
-  EXPECT_EQ(StatusFull1->getSize(), 30u);
-  EXPECT_EQ(StatusMinimized0->getName(), StringRef("/mod.h"));
-  EXPECT_EQ(StatusFull1->getName(), StringRef("/mod.h"));
-}
-
-TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately2) {
-  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
-  VFS->addFile("/mod.h", 0,
-               llvm::MemoryBuffer::getMemBuffer("#include <foo.h>\n"
-                                                "// hi there!\n"));
-
-  DependencyScanningFilesystemSharedCache SharedCache;
-  ExcludedPreprocessorDirectiveSkipMapping Mappings;
-  DependencyScanningWorkerFilesystem DepFS(SharedCache, VFS, Mappings);
-
-  DepFS.disableDirectivesScanning("/mod.h");
-  auto StatusFull0 = DepFS.status("/mod.h");
-  DepFS.enableDirectivesScanningOfAllFiles();
-  auto StatusMinimized1 = DepFS.status("/mod.h");
-
-  EXPECT_TRUE(StatusFull0);
-  EXPECT_TRUE(StatusMinimized1);
-  EXPECT_EQ(StatusFull0->getSize(), 30u);
-  EXPECT_EQ(StatusMinimized1->getSize(), 17u);
-  EXPECT_EQ(StatusFull0->getName(), StringRef("/mod.h"));
-  EXPECT_EQ(StatusMinimized1->getName(), StringRef("/mod.h"));
-}
-
-} // end namespace dependencies
 } // end namespace tooling
 } // end namespace clang


        


More information about the cfe-commits mailing list