[clang] [WIP][Safe Buffers] Serialize unsafe_buffer_usage pragmas (PR #92031)

Ziqing Luo via cfe-commits cfe-commits at lists.llvm.org
Tue Jun 4 14:46:35 PDT 2024


https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/92031

>From ac5aeb5c3a134d085320fc7fc5cf3f2c8c41a1f1 Mon Sep 17 00:00:00 2001
From: ziqingluo-90 <ziqing_luo at apple.com>
Date: Mon, 13 May 2024 13:31:21 -0700
Subject: [PATCH 1/4] fix safe buffer opt-out region serialization

---
 clang/include/clang/Lex/Preprocessor.h        |  22 +++-
 .../include/clang/Serialization/ASTBitCodes.h |   3 +
 clang/lib/Lex/Preprocessor.cpp                | 106 ++++++++++++++----
 clang/lib/Serialization/ASTReader.cpp         |  11 ++
 clang/lib/Serialization/ASTWriter.cpp         |   7 ++
 clang/test/Modules/Inputs/SafeBuffers/base.h  |   9 ++
 .../SafeBuffers/safe_buffers_test.modulemap   |  10 ++
 .../Modules/Inputs/SafeBuffers/test_sub1.h    |  20 ++++
 .../Modules/Inputs/SafeBuffers/test_sub2.h    |  11 ++
 clang/test/Modules/safe_buffers_optout.cpp    |  39 +++++++
 ...unsafe-buffer-usage-pragma-pch-complex.cpp |  72 ++++++++++++
 .../warn-unsafe-buffer-usage-pragma-pch.cpp   |  27 +++++
 12 files changed, 314 insertions(+), 23 deletions(-)
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/base.h
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub1.h
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub2.h
 create mode 100644 clang/test/Modules/safe_buffers_optout.cpp
 create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp
 create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp

diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index e89b4a2c5230e..8d6884ebe7597 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2883,11 +2883,15 @@ class Preprocessor {
   /// otherwise.
   SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region.
 
-  // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one
-  // translation unit. Each region is represented by a pair of start and end
-  // locations.  A region is "open" if its' start and end locations are
+  using SafeBufferOptOutMapTy =
+      SmallVector<std::pair<SourceLocation, SourceLocation>, 16>;
+  // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this
+  // translation unit. Each region is represented by a pair of start and
+  // end locations.  A region is "open" if its' start and end locations are
   // identical.
-  SmallVector<std::pair<SourceLocation, SourceLocation>, 8> SafeBufferOptOutMap;
+  SafeBufferOptOutMapTy SafeBufferOptOutMap;
+  // `SafeBufferOptOutMap`s of loaded files:
+  llvm::DenseMap<FileID, SafeBufferOptOutMapTy> LoadedSafeBufferOptOutMap;
 
 public:
   /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out
@@ -2918,6 +2922,16 @@ class Preprocessor {
   ///          opt-out region
   bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc);
 
+  /// \return a sequence of SourceLocations representing ordered opt-out regions
+  /// specified by
+  /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit.
+  SmallVector<SourceLocation, 64> serializeSafeBufferOptOutMap() const;
+
+  /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
+  /// record of code `PP_UNSAFE_BUFFER_USAGE`.
+  void setDeserializedSafeBufferOptOutMap(
+      const SmallVectorImpl<SourceLocation> &SrcLocSeqs);
+
 private:
   /// Helper functions to forward lexing to the actual lexer. They all share the
   /// same signature.
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index dcfa4ac0c1967..d1a0eba943039 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -775,6 +775,9 @@ enum ASTRecordTypes {
   /// Record code for lexical and visible block for delayed namespace in
   /// reduced BMI.
   DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD = 68,
+
+  /// Record code for \#pragma clang unsafe_buffer_usage begin/end
+  PP_UNSAFE_BUFFER_USAGE = 69,
 };
 
 /// Record types used within a source manager block.
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 0b70192743a39..6a41e3d4138aa 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -58,6 +58,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Capacity.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -1483,26 +1484,41 @@ void Preprocessor::emitFinalMacroWarning(const Token &Identifier,
 }
 
 bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr,
-                                           const SourceLocation &Loc) const {
-  // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
-  auto FirstRegionEndingAfterLoc = llvm::partition_point(
-      SafeBufferOptOutMap,
-      [&SourceMgr,
-       &Loc](const std::pair<SourceLocation, SourceLocation> &Region) {
-        return SourceMgr.isBeforeInTranslationUnit(Region.second, Loc);
-      });
+                                      const SourceLocation &Loc) const {
+  // The lambda that tests if a `Loc` is in an opt-out region given one opt-out
+  // region map:
+  auto TestInMap = [&SourceMgr](const SafeBufferOptOutMapTy &Map,
+                                const SourceLocation &Loc) -> bool {
+    // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
+    auto FirstRegionEndingAfterLoc = llvm::partition_point(
+        Map, [&SourceMgr,
+              &Loc](const std::pair<SourceLocation, SourceLocation> &Region) {
+          return SourceMgr.isBeforeInTranslationUnit(Region.second, Loc);
+        });
+
+    if (FirstRegionEndingAfterLoc != Map.end()) {
+      // To test if the start location of the found region precedes `Loc`:
+      return SourceMgr.isBeforeInTranslationUnit(
+          FirstRegionEndingAfterLoc->first, Loc);
+    }
+    // If we do not find a region whose end location passes `Loc`, we want to
+    // check if the current region is still open:
+    if (!Map.empty() && Map.back().first == Map.back().second)
+      return SourceMgr.isBeforeInTranslationUnit(Map.back().first, Loc);
+    return false;
+  };
 
-  if (FirstRegionEndingAfterLoc != SafeBufferOptOutMap.end()) {
-    // To test if the start location of the found region precedes `Loc`:
-    return SourceMgr.isBeforeInTranslationUnit(FirstRegionEndingAfterLoc->first,
-                                               Loc);
-  }
-  // If we do not find a region whose end location passes `Loc`, we want to
-  // check if the current region is still open:
-  if (!SafeBufferOptOutMap.empty() &&
-      SafeBufferOptOutMap.back().first == SafeBufferOptOutMap.back().second)
-    return SourceMgr.isBeforeInTranslationUnit(SafeBufferOptOutMap.back().first,
-                                               Loc);
+  if (SourceMgr.isLocalSourceLocation(Loc))
+    return TestInMap(SafeBufferOptOutMap, Loc);
+
+  // Expand macros (if any) until it gets to a file location:
+  SourceLocation FLoc = SourceMgr.getFileLoc(Loc);
+  FileID FlocFID = SourceMgr.getDecomposedLoc(FLoc).first;
+  auto LoadedMap = LoadedSafeBufferOptOutMap.find(FlocFID);
+
+  if (LoadedMap != LoadedSafeBufferOptOutMap.end())
+    return TestInMap(LoadedMap->getSecond(), Loc);
+  // FIXME: think out if this is unreachable?
   return false;
 }
 
@@ -1551,6 +1567,58 @@ bool Preprocessor::isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc) {
   return InSafeBufferOptOutRegion;
 }
 
+SmallVector<SourceLocation, 64>
+Preprocessor::serializeSafeBufferOptOutMap() const {
+  assert(!InSafeBufferOptOutRegion &&
+         "Attempt to serialize safe buffer opt-out regions before file being "
+         "completely preprocessed");
+
+  SmallVector<SourceLocation, 64> SrcSeq;
+
+  for (const auto &[begin, end] : SafeBufferOptOutMap) {
+    SrcSeq.push_back(begin);
+    SrcSeq.push_back(end);
+  }
+  // Only `SafeBufferOptOutMap` gets serialized. No need to serialize
+  // `LoadedSafeBufferOptOutMap` because if this TU loads a pch/module, every
+  // pch/module in the pch-chain/module-DAG will be loaded one by one in order.
+  // It means that for each loading pch/module m, it just needs to load m's own
+  // `SafeBufferOptOutMap`.
+  return SrcSeq;
+}
+
+void Preprocessor::setDeserializedSafeBufferOptOutMap(
+    const SmallVectorImpl<SourceLocation> &SourceLocations) {
+  auto It = SourceLocations.begin();
+
+  assert(SourceLocations.size() % 2 == 0 &&
+         "ill-formed SourceLocation sequence");
+  while (It != SourceLocations.end()) {
+    SourceLocation begin = *It++;
+    SourceLocation end = *It++;
+    SourceLocation FileLoc = SourceMgr.getFileLoc(begin);
+    FileID FID = SourceMgr.getDecomposedLoc(FileLoc).first;
+
+    if (FID.isInvalid()) {
+      // I suppose this should not happen:
+      assert(false && "Attempted to read a safe buffer opt-out region whose "
+                      "begin location is associated to an invalid File ID.");
+      break;
+    }
+    assert(!SourceMgr.isLocalFileID(FID) && "Expected a pch/module file");
+    // Here we assume that
+    // `SourceMgr.getFileLoc(begin) == SourceMgr.getFileLoc(end)`.
+    // Though it may not hold in very rare and strange cases, i.e., a pair of
+    // opt-out pragams written in different files but are always used in a way
+    // that they match in a TU (otherwise PP will complaint).
+    // FIXME: PP should complaint about cross file safe buffer opt-out pragmas.
+    assert(FID == SourceMgr.getDecomposedLoc(SourceMgr.getFileLoc(end)).first &&
+           "Maybe a safe buffer opt-out region starts and ends at different "
+           "files.");
+    LoadedSafeBufferOptOutMap[FID].emplace_back(begin, end);
+  }
+}
+
 ModuleLoader::~ModuleLoader() = default;
 
 CommentHandler::~CommentHandler() = default;
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 43b69045bb054..4e22b3e1187c0 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -3586,6 +3586,17 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
       break;
     }
 
+    case PP_UNSAFE_BUFFER_USAGE: {
+      if (!Record.empty()) {
+        SmallVector<SourceLocation, 64> SrcLocs;
+        unsigned Idx = 0;
+        while (Idx < Record.size())
+          SrcLocs.push_back(ReadSourceLocation(F, Record, Idx));
+        PP.setDeserializedSafeBufferOptOutMap(SrcLocs);
+      }
+      break;
+    }
+
     case PP_CONDITIONAL_STACK:
       if (!Record.empty()) {
         unsigned Idx = 0, End = Record.size() - 1;
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 30195868ca996..3203173640d18 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -911,6 +911,7 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(PP_CONDITIONAL_STACK);
   RECORD(DECLS_TO_CHECK_FOR_DEFERRED_DIAGS);
   RECORD(PP_ASSUME_NONNULL_LOC);
+  RECORD(PP_UNSAFE_BUFFER_USAGE);
 
   // SourceManager Block.
   BLOCK(SOURCE_MANAGER_BLOCK);
@@ -2439,6 +2440,12 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
     Record.clear();
   }
 
+  // Write the safe buffer opt-out region map in PP
+  for (SourceLocation &S : PP.serializeSafeBufferOptOutMap())
+    AddSourceLocation(std::move(S), Record);
+  Stream.EmitRecord(PP_UNSAFE_BUFFER_USAGE, Record);
+  Record.clear();
+
   // Enter the preprocessor block.
   Stream.EnterSubblock(PREPROCESSOR_BLOCK_ID, 3);
 
diff --git a/clang/test/Modules/Inputs/SafeBuffers/base.h b/clang/test/Modules/Inputs/SafeBuffers/base.h
new file mode 100644
index 0000000000000..660df17998a48
--- /dev/null
+++ b/clang/test/Modules/Inputs/SafeBuffers/base.h
@@ -0,0 +1,9 @@
+#ifdef __cplusplus
+int base(int *p) {
+  int x = p[5];
+#pragma clang unsafe_buffer_usage begin
+  int y = p[5];
+#pragma clang unsafe_buffer_usage end
+  return x + y;
+}
+#endif
diff --git a/clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap b/clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap
new file mode 100644
index 0000000000000..a4826c6514b98
--- /dev/null
+++ b/clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap
@@ -0,0 +1,10 @@
+module safe_buffers_test_base {
+  header "base.h"
+}
+
+module safe_buffers_test_optout {
+  explicit module test_sub1 {  header "test_sub1.h" }
+  explicit module test_sub2 {  textual header "test_sub2.h" }
+  use safe_buffers_test_base
+}
+
diff --git a/clang/test/Modules/Inputs/SafeBuffers/test_sub1.h b/clang/test/Modules/Inputs/SafeBuffers/test_sub1.h
new file mode 100644
index 0000000000000..c2b105c6e748a
--- /dev/null
+++ b/clang/test/Modules/Inputs/SafeBuffers/test_sub1.h
@@ -0,0 +1,20 @@
+#include "base.h"
+
+#ifdef __cplusplus
+int sub1(int *p) {
+  int x = p[5];
+#pragma clang unsafe_buffer_usage begin  
+  int y = p[5];
+#pragma clang unsafe_buffer_usage end
+  return x + y;
+}
+
+template <typename T>
+T sub1_T(T *p) {
+  T x = p[5];
+#pragma clang unsafe_buffer_usage begin  
+  T y = p[5];
+#pragma clang unsafe_buffer_usage end
+  return x + y;
+}
+#endif
diff --git a/clang/test/Modules/Inputs/SafeBuffers/test_sub2.h b/clang/test/Modules/Inputs/SafeBuffers/test_sub2.h
new file mode 100644
index 0000000000000..11eb9591ab600
--- /dev/null
+++ b/clang/test/Modules/Inputs/SafeBuffers/test_sub2.h
@@ -0,0 +1,11 @@
+#include "base.h"
+
+#ifdef __cplusplus
+int sub2(int *p) {  
+  int x = p[5];
+#pragma clang unsafe_buffer_usage begin
+  int y = p[5];
+#pragma clang unsafe_buffer_usage end
+  return x + y;
+}
+#endif
diff --git a/clang/test/Modules/safe_buffers_optout.cpp b/clang/test/Modules/safe_buffers_optout.cpp
new file mode 100644
index 0000000000000..dd8b1cca5ec89
--- /dev/null
+++ b/clang/test/Modules/safe_buffers_optout.cpp
@@ -0,0 +1,39 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module -fmodule-name=safe_buffers_test_base -x c++ %S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20\
+// RUN:     -o %t/safe_buffers_test_base.pcm -Wunsafe-buffer-usage
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module -fmodule-name=safe_buffers_test_optout -x c++ %S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20\
+// RUN:     -o %t/safe_buffers_test_optout.pcm -fmodule-file=%t/safe_buffers_test_base.pcm -Wunsafe-buffer-usage
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify -fmodule-file=%t/safe_buffers_test_optout.pcm -I %S/Inputs/SafeBuffers %s\
+// RUN:     -std=c++20 -Wunsafe-buffer-usage
+
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module -fmodules-cache-path=%t -fmodule-name=safe_buffers_test_base -x c++\
+// RUN:     %S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20 -Wunsafe-buffer-usage
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module -fmodules-cache-path=%t -fmodule-name=safe_buffers_test_optout -x c++\
+// RUN:     %S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20 -Wunsafe-buffer-usage
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs/SafeBuffers %s -x c++\
+// RUN:     -std=c++20 -Wunsafe-buffer-usage
+
+#include "test_sub1.h"
+#include "test_sub2.h"
+
+// Testing safe buffers opt-out region serialization with modules: this
+// file loads 2 submodules from top-level module
+// `safe_buffers_test_optout`, which uses another top-level module
+// `safe_buffers_test_base`. (So the module dependencies form a DAG.)
+
+// expected-warning at base.h:3{{unsafe buffer access}}
+// expected-note at base.h:3{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+// expected-warning at test_sub1.h:5{{unsafe buffer access}}
+// expected-note at test_sub1.h:5{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+// expected-warning at test_sub1.h:14{{unsafe buffer access}}
+// expected-note at test_sub1.h:14{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+// expected-warning at test_sub2.h:5{{unsafe buffer access}}
+// expected-note at test_sub2.h:5{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+int foo(int * p) {
+  int x = p[5]; // expected-warning{{unsafe buffer access}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+#pragma clang unsafe_buffer_usage begin
+  int y = p[5];
+#pragma clang unsafe_buffer_usage end
+  sub1_T(p); // instantiate template
+  return base(p) + sub1(p) + sub2(p);
+}
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp
new file mode 100644
index 0000000000000..04ac77b3f342e
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp
@@ -0,0 +1,72 @@
+// A more complex example than warn-unsafe-buffer-usage-pragma-pch.cpp:
+//   MAIN - includes INC_H_1 
+//        \ loads    PCH_H_1 - includes INC_H_2
+//                           \ loads    PCH_H_2   
+
+// Test with PCH
+// RUN: %clang_cc1 -Wno-unused-value -std=c++20 -emit-pch -o %t-1 -DPCH_H_2 %s
+// RUN: %clang_cc1 -Wno-unused-value -std=c++20 -include-pch %t-1 -emit-pch -o %t-2 -DPCH_H_1 %s
+// RUN: %clang_cc1 -Wno-unused-value -Wunsafe-buffer-usage -std=c++20 -include-pch %t-2 -DMAIN -verify %s
+
+#define UNSAFE_BEGIN _Pragma("clang unsafe_buffer_usage begin")
+#define UNSAFE_END   _Pragma("clang unsafe_buffer_usage end")
+
+
+#ifdef INC_H_1
+#undef INC_H_1
+int a(int *s) {
+  s[2];  // <- expected warning here
+#pragma clang unsafe_buffer_usage begin
+  return s[1];
+#pragma clang unsafe_buffer_usage end
+}
+#endif
+
+#ifdef INC_H_2
+#undef INC_H_2
+int b(int *s) {
+  s[2];  // <- expected warning here  
+#pragma clang unsafe_buffer_usage begin
+  return s[1];
+#pragma clang unsafe_buffer_usage end
+}
+#endif
+
+#ifdef PCH_H_1
+#undef PCH_H_1
+#define INC_H_2
+#include "warn-unsafe-buffer-usage-pragma-pch-complex.cpp"
+
+int c(int *s) {
+  s[2];  // <- expected warning here  
+UNSAFE_BEGIN
+  return s[1];
+UNSAFE_END
+}
+#endif
+
+#ifdef PCH_H_2
+#undef PCH_H_2
+int d(int *s) {
+  s[2];  // <- expected warning here  
+#pragma clang unsafe_buffer_usage begin
+  return s[1];
+#pragma clang unsafe_buffer_usage end
+}
+#endif
+
+#ifdef MAIN
+#undef MAIN
+#define INC_H_1
+#include "warn-unsafe-buffer-usage-pragma-pch-complex.cpp"
+
+// expected-warning at -45{{unsafe buffer access}} expected-note at -45{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+// expected-warning at -36{{unsafe buffer access}} expected-note at -36{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+// expected-warning at -24{{unsafe buffer access}} expected-note at -24{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+// expected-warning at -15{{unsafe buffer access}} expected-note at -15{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+int main() {
+  int s[] = {1, 2, 3};
+  return a(s) + b(s) + c(s) + d(s);
+}
+#undef MAIN
+#endif
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp
new file mode 100644
index 0000000000000..abd3f0ffe9565
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp
@@ -0,0 +1,27 @@
+// The original example from https://github.com/llvm/llvm-project/issues/90501
+
+// Test without PCH
+// RUN: %clang_cc1 -Wno-unused-value -Wunsafe-buffer-usage -std=c++20 -include %s -verify %s
+// Test with PCH
+// RUN: %clang_cc1 -Wno-unused-value -std=c++20 -emit-pch -o %t %s
+// RUN: %clang_cc1 -Wno-unused-value -Wunsafe-buffer-usage -std=c++20 -include-pch %t -verify %s
+
+#ifndef A_H
+#define A_H
+
+int a(int *s) {
+  s[2];  // <- expected warning here
+#pragma clang unsafe_buffer_usage begin
+  return s[1];
+#pragma clang unsafe_buffer_usage end
+}
+
+#else
+// expected-warning at -7{{unsafe buffer access}}
+// expected-note at -8{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+int main() {
+  int s[] = {1, 2, 3};
+  return a(s);
+}
+
+#endif

>From 23050bcd41a6ebf7dce02be233e0a2d687f3f98d Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Wed, 15 May 2024 14:31:37 -0700
Subject: [PATCH 2/4] fix the test command for lazily loading modules

---
 clang/test/Modules/safe_buffers_optout.cpp | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/clang/test/Modules/safe_buffers_optout.cpp b/clang/test/Modules/safe_buffers_optout.cpp
index dd8b1cca5ec89..09f42f7683e2b 100644
--- a/clang/test/Modules/safe_buffers_optout.cpp
+++ b/clang/test/Modules/safe_buffers_optout.cpp
@@ -6,12 +6,8 @@
 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify -fmodule-file=%t/safe_buffers_test_optout.pcm -I %S/Inputs/SafeBuffers %s\
 // RUN:     -std=c++20 -Wunsafe-buffer-usage
 
-// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module -fmodules-cache-path=%t -fmodule-name=safe_buffers_test_base -x c++\
-// RUN:     %S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20 -Wunsafe-buffer-usage
-// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module -fmodules-cache-path=%t -fmodule-name=safe_buffers_test_optout -x c++\
-// RUN:     %S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20 -Wunsafe-buffer-usage
-// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs/SafeBuffers %s -x c++\
-// RUN:     -std=c++20 -Wunsafe-buffer-usage
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -fmodule-map-file=%S/Inputs/SafeBuffers/safe_buffers_test.modulemap -I %S/Inputs/SafeBuffers %s\
+// RUN:     -x c++ -std=c++20 -Wunsafe-buffer-usage
 
 #include "test_sub1.h"
 #include "test_sub2.h"

>From 64b1f95cc535f284e993c395d6d258ac8d83cfe4 Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Thu, 16 May 2024 15:37:11 -0700
Subject: [PATCH 3/4] De-serialize an unwelcomed opt-out region is an error

Throw an error if clang deserializes a safe buffer opt-out region that
begins and ends at different files.
---
 clang/include/clang/Lex/Preprocessor.h        |  5 +++-
 clang/lib/Lex/Preprocessor.cpp                | 27 ++++++++++---------
 clang/lib/Serialization/ASTReader.cpp         |  4 ++-
 .../unsafe-buffer-usage-pragma-pch-bad.cpp    | 25 +++++++++++++++++
 4 files changed, 47 insertions(+), 14 deletions(-)
 create mode 100644 clang/test/PCH/unsafe-buffer-usage-pragma-pch-bad.cpp

diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 8d6884ebe7597..e49a7c5847684 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2929,7 +2929,10 @@ class Preprocessor {
 
   /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
   /// record of code `PP_UNSAFE_BUFFER_USAGE`.
-  void setDeserializedSafeBufferOptOutMap(
+  /// \return `std::nullopt` iff deserialization succeeds and this PP has been
+  /// properly set; Otherwise, a StringRef carrying the message of why
+  /// serialization fails with this PP being untouched.
+  std::optional<StringRef> setDeserializedSafeBufferOptOutMap(
       const SmallVectorImpl<SourceLocation> &SrcLocSeqs);
 
 private:
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 6a41e3d4138aa..42e314e25854a 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1587,9 +1587,11 @@ Preprocessor::serializeSafeBufferOptOutMap() const {
   return SrcSeq;
 }
 
-void Preprocessor::setDeserializedSafeBufferOptOutMap(
+std::optional<StringRef> Preprocessor::setDeserializedSafeBufferOptOutMap(
     const SmallVectorImpl<SourceLocation> &SourceLocations) {
   auto It = SourceLocations.begin();
+  decltype(LoadedSafeBufferOptOutMap)
+      TmpMap; // temporarily hold data for `LoadedSafeBufferOptOutMap`
 
   assert(SourceLocations.size() % 2 == 0 &&
          "ill-formed SourceLocation sequence");
@@ -1599,24 +1601,25 @@ void Preprocessor::setDeserializedSafeBufferOptOutMap(
     SourceLocation FileLoc = SourceMgr.getFileLoc(begin);
     FileID FID = SourceMgr.getDecomposedLoc(FileLoc).first;
 
-    if (FID.isInvalid()) {
-      // I suppose this should not happen:
-      assert(false && "Attempted to read a safe buffer opt-out region whose "
-                      "begin location is associated to an invalid File ID.");
-      break;
-    }
+    assert(!FID.isInvalid() &&
+           "Attempted to read a safe buffer opt-out region whose "
+           "begin location is associated to an invalid File ID.");
     assert(!SourceMgr.isLocalFileID(FID) && "Expected a pch/module file");
-    // Here we assume that
+    // Here we require that
     // `SourceMgr.getFileLoc(begin) == SourceMgr.getFileLoc(end)`.
     // Though it may not hold in very rare and strange cases, i.e., a pair of
     // opt-out pragams written in different files but are always used in a way
     // that they match in a TU (otherwise PP will complaint).
     // FIXME: PP should complaint about cross file safe buffer opt-out pragmas.
-    assert(FID == SourceMgr.getDecomposedLoc(SourceMgr.getFileLoc(end)).first &&
-           "Maybe a safe buffer opt-out region starts and ends at different "
-           "files.");
-    LoadedSafeBufferOptOutMap[FID].emplace_back(begin, end);
+    if (FID != SourceMgr.getDecomposedLoc(SourceMgr.getFileLoc(end)).first) {
+      return "Cannot de-serialize a safe buffer opt-out region that begins and "
+             "ends at different files";
+    }
+    TmpMap[FID].emplace_back(begin, end);
   }
+  for (auto [K, V] : TmpMap)
+    LoadedSafeBufferOptOutMap[K].append(V);
+  return std::nullopt;
 }
 
 ModuleLoader::~ModuleLoader() = default;
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 4e22b3e1187c0..f1c3fe0fe456a 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -3592,7 +3592,9 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
         unsigned Idx = 0;
         while (Idx < Record.size())
           SrcLocs.push_back(ReadSourceLocation(F, Record, Idx));
-        PP.setDeserializedSafeBufferOptOutMap(SrcLocs);
+        if (auto ErrMsg = PP.setDeserializedSafeBufferOptOutMap(SrcLocs))
+          return llvm::createStringError(std::errc::invalid_argument,
+                                         ErrMsg->data());
       }
       break;
     }
diff --git a/clang/test/PCH/unsafe-buffer-usage-pragma-pch-bad.cpp b/clang/test/PCH/unsafe-buffer-usage-pragma-pch-bad.cpp
new file mode 100644
index 0000000000000..7713fb47cc55f
--- /dev/null
+++ b/clang/test/PCH/unsafe-buffer-usage-pragma-pch-bad.cpp
@@ -0,0 +1,25 @@
+// Test that a serialized safe buffer opt-out region begins and ends at different files.
+// RUN: %clang_cc1 -Wno-unused-value -std=c++20 -emit-pch -o %t %s -include %s
+// RUN: not %clang_cc1 -Wno-unused-value -Wunsafe-buffer-usage -std=c++20 -include-pch %t %s 2>&1 | FileCheck %s
+
+// CHECK: fatal error:{{.*}}'Cannot de-serialize a safe buffer opt-out region that begins and ends at different files'
+#ifndef A_H
+#define A_H
+
+int a() {
+#pragma clang unsafe_buffer_usage begin
+  return 0;
+}
+
+#elif (defined(A_H) && !defined(B_H))
+#define B_H
+
+#pragma clang unsafe_buffer_usage end
+
+#else
+
+int main() {
+  return a();
+}
+
+#endif

>From 72805ea09f6bc5daeb4978d8af8b9731cdfd927c Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Tue, 4 Jun 2024 14:33:46 -0700
Subject: [PATCH 4/4] Support multi-file safe buffer opt-out regions in PCHs

Figured out how to identify a loaded AST from a source location.  With
that, we can eliminate the restriction that a de-serialized safe
buffer opt-out region cannot span over multiple files. (Of course
those files need to be in the same TU.)
---
 clang/include/clang/Basic/SourceManager.h     |  7 +++
 clang/include/clang/Lex/Preprocessor.h        | 50 ++++++++++++----
 clang/lib/Basic/SourceManager.cpp             | 14 +++++
 clang/lib/Lex/Preprocessor.cpp                | 59 +++++++------------
 clang/lib/Serialization/ASTReader.cpp         |  4 +-
 .../SafeBuffers/safe_buffers_test.modulemap   |  5 ++
 .../Modules/Inputs/SafeBuffers/test_module.h  |  7 +++
 clang/test/Modules/safe_buffers_optout.cpp    |  8 ++-
 .../test/PCH/UnsafeBufferPragma/header-2.hpp  | 19 ++++++
 clang/test/PCH/UnsafeBufferPragma/header.hpp  | 24 ++++++++
 ...nsafe-buffer-usage-pragma-pch-complex.cpp} |  7 +--
 ...-buffer-usage-pragma-pch-cross-files-2.cpp |  9 +++
 ...fe-buffer-usage-pragma-pch-cross-files.cpp |  8 +++
 .../unsafe-buffer-usage-pragma-pch-bad.cpp    | 25 --------
 14 files changed, 166 insertions(+), 80 deletions(-)
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_module.h
 create mode 100644 clang/test/PCH/UnsafeBufferPragma/header-2.hpp
 create mode 100644 clang/test/PCH/UnsafeBufferPragma/header.hpp
 rename clang/test/{SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp => PCH/UnsafeBufferPragma/unsafe-buffer-usage-pragma-pch-complex.cpp} (90%)
 create mode 100644 clang/test/PCH/UnsafeBufferPragma/unsafe-buffer-usage-pragma-pch-cross-files-2.cpp
 create mode 100644 clang/test/PCH/UnsafeBufferPragma/unsafe-buffer-usage-pragma-pch-cross-files.cpp
 delete mode 100644 clang/test/PCH/unsafe-buffer-usage-pragma-pch-bad.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h
index d2ece14da0b11..359dd2cfe41e0 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -1672,6 +1672,13 @@ class SourceManager : public RefCountedBase<SourceManager> {
   isInTheSameTranslationUnit(std::pair<FileID, unsigned> &LOffs,
                              std::pair<FileID, unsigned> &ROffs) const;
 
+  /// \param Loc a source location in a loaded AST (of a PCH/Module file).
+  /// \returns the first FileID of the loaded AST where `Loc` belongs to.  One
+  /// can use the returned FileID to check if two source locations are from the
+  /// same loaded AST.  See `LoadedSLocEntryAllocBegin`.  The behavior is
+  /// undefined if `isLoadedSourceLocation(Loc)` is NOT true.
+  FileID getFirstFIDOfLoadedAST(SourceLocation Loc) const;
+
   /// Determines whether the two decomposed source location is in the same TU.
   bool isInTheSameTranslationUnitImpl(
       const std::pair<FileID, unsigned> &LOffs,
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index e49a7c5847684..bd4cfa12c7cce 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2883,15 +2883,46 @@ class Preprocessor {
   /// otherwise.
   SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region.
 
-  using SafeBufferOptOutMapTy =
+  using SafeBufferOptOutRegionsTy =
       SmallVector<std::pair<SourceLocation, SourceLocation>, 16>;
   // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this
   // translation unit. Each region is represented by a pair of start and
-  // end locations.  A region is "open" if its' start and end locations are
-  // identical.
-  SafeBufferOptOutMapTy SafeBufferOptOutMap;
-  // `SafeBufferOptOutMap`s of loaded files:
-  llvm::DenseMap<FileID, SafeBufferOptOutMapTy> LoadedSafeBufferOptOutMap;
+  // end locations.
+  SafeBufferOptOutRegionsTy SafeBufferOptOutMap;
+  // The "-Wunsafe-buffer-usage" opt-out regions in loaded ASTs.  We use the
+  // following structure to manage them by their ASTs.
+  struct {
+    // The first FileID of a loaded AST can be used to represent the
+    // loaded AST.  (By the exact words on
+    // `SourceManager::LoadedSLocEntryAllocBegin`, first FileIDs can be used
+    // to tell if they are from the same loaded AST.)
+    //
+    // The map below is from such FileIDs to the opt-out regions associated to
+    // their loaded ASTs.
+    llvm::DenseMap<FileID, SafeBufferOptOutRegionsTy> FirstFID2Regions;
+
+    // Returns a reference to the safe buffer opt-out regions of the loaded
+    // AST where `Loc` belongs to. (Construct if absent)
+    SafeBufferOptOutRegionsTy &
+    findAndConsLoadedOptOutMap(SourceLocation Loc, SourceManager &SrcMgr) {
+      FileID FID = SrcMgr.getFirstFIDOfLoadedAST(Loc);
+      return FirstFID2Regions.FindAndConstruct(FID).second;
+    }
+
+    // Returns a reference to the safe buffer opt-out regions of the loaded
+    // AST where `Loc` belongs to. (This const function returns nullptr if
+    // absent.)
+    const SafeBufferOptOutRegionsTy *
+    lookupLoadedOptOutMap(SourceLocation Loc,
+                          const SourceManager &SrcMgr) const {
+      FileID FID = SrcMgr.getFirstFIDOfLoadedAST(Loc);
+      auto Iter = FirstFID2Regions.find_as(FID);
+
+      if (Iter == FirstFID2Regions.end())
+        return nullptr;
+      return &Iter->getSecond();
+    }
+  } LoadedSafeBufferOptOutMap;
 
 public:
   /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out
@@ -2929,10 +2960,9 @@ class Preprocessor {
 
   /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
   /// record of code `PP_UNSAFE_BUFFER_USAGE`.
-  /// \return `std::nullopt` iff deserialization succeeds and this PP has been
-  /// properly set; Otherwise, a StringRef carrying the message of why
-  /// serialization fails with this PP being untouched.
-  std::optional<StringRef> setDeserializedSafeBufferOptOutMap(
+  /// \return true iff the `Preprocessor` has been updated; false `Preprocessor`
+  /// is same as itself before the call.
+  bool setDeserializedSafeBufferOptOutMap(
       const SmallVectorImpl<SourceLocation> &SrcLocSeqs);
 
 private:
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index 37734d3b10e78..43634449b7318 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -1911,6 +1911,20 @@ SourceManager::getDecomposedIncludedLoc(FileID FID) const {
   return DecompLoc;
 }
 
+FileID SourceManager::getFirstFIDOfLoadedAST(SourceLocation Loc) const {
+  assert(isLoadedSourceLocation(Loc) &&
+         "Must be a source location in a loaded PCH/Module file");
+
+  auto [FID, Ignore] = getDecomposedLoc(Loc);
+  const FileID *FirstFID =
+      llvm::lower_bound(LoadedSLocEntryAllocBegin, FID, std::greater<FileID>{});
+
+  assert(FirstFID &&
+         "The failure to find the first FileID of a "
+         "loaded AST from a loaded source location was unexpected.");
+  return *FirstFID;
+}
+
 bool SourceManager::isInTheSameTranslationUnitImpl(
     const std::pair<FileID, unsigned> &LOffs,
     const std::pair<FileID, unsigned> &ROffs) const {
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 42e314e25854a..414a65223339c 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1487,7 +1487,7 @@ bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr,
                                       const SourceLocation &Loc) const {
   // The lambda that tests if a `Loc` is in an opt-out region given one opt-out
   // region map:
-  auto TestInMap = [&SourceMgr](const SafeBufferOptOutMapTy &Map,
+  auto TestInMap = [&SourceMgr](const SafeBufferOptOutRegionsTy &Map,
                                 const SourceLocation &Loc) -> bool {
     // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
     auto FirstRegionEndingAfterLoc = llvm::partition_point(
@@ -1511,14 +1511,13 @@ bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr,
   if (SourceMgr.isLocalSourceLocation(Loc))
     return TestInMap(SafeBufferOptOutMap, Loc);
 
-  // Expand macros (if any) until it gets to a file location:
-  SourceLocation FLoc = SourceMgr.getFileLoc(Loc);
-  FileID FlocFID = SourceMgr.getDecomposedLoc(FLoc).first;
-  auto LoadedMap = LoadedSafeBufferOptOutMap.find(FlocFID);
+  // If Loc is from a loaded AST:
+  const SafeBufferOptOutRegionsTy *LoadedRegions =
+      LoadedSafeBufferOptOutMap.lookupLoadedOptOutMap(Loc, SourceMgr);
 
-  if (LoadedMap != LoadedSafeBufferOptOutMap.end())
-    return TestInMap(LoadedMap->getSecond(), Loc);
-  // FIXME: think out if this is unreachable?
+  if (LoadedRegions)
+    return TestInMap(*LoadedRegions, Loc);
+  // The loaded AST has no opt-out region:
   return false;
 }
 
@@ -1587,39 +1586,25 @@ Preprocessor::serializeSafeBufferOptOutMap() const {
   return SrcSeq;
 }
 
-std::optional<StringRef> Preprocessor::setDeserializedSafeBufferOptOutMap(
+bool Preprocessor::setDeserializedSafeBufferOptOutMap(
     const SmallVectorImpl<SourceLocation> &SourceLocations) {
-  auto It = SourceLocations.begin();
-  decltype(LoadedSafeBufferOptOutMap)
-      TmpMap; // temporarily hold data for `LoadedSafeBufferOptOutMap`
+  if (SourceLocations.size() == 0)
+    return false;
 
   assert(SourceLocations.size() % 2 == 0 &&
          "ill-formed SourceLocation sequence");
-  while (It != SourceLocations.end()) {
-    SourceLocation begin = *It++;
-    SourceLocation end = *It++;
-    SourceLocation FileLoc = SourceMgr.getFileLoc(begin);
-    FileID FID = SourceMgr.getDecomposedLoc(FileLoc).first;
-
-    assert(!FID.isInvalid() &&
-           "Attempted to read a safe buffer opt-out region whose "
-           "begin location is associated to an invalid File ID.");
-    assert(!SourceMgr.isLocalFileID(FID) && "Expected a pch/module file");
-    // Here we require that
-    // `SourceMgr.getFileLoc(begin) == SourceMgr.getFileLoc(end)`.
-    // Though it may not hold in very rare and strange cases, i.e., a pair of
-    // opt-out pragams written in different files but are always used in a way
-    // that they match in a TU (otherwise PP will complaint).
-    // FIXME: PP should complaint about cross file safe buffer opt-out pragmas.
-    if (FID != SourceMgr.getDecomposedLoc(SourceMgr.getFileLoc(end)).first) {
-      return "Cannot de-serialize a safe buffer opt-out region that begins and "
-             "ends at different files";
-    }
-    TmpMap[FID].emplace_back(begin, end);
-  }
-  for (auto [K, V] : TmpMap)
-    LoadedSafeBufferOptOutMap[K].append(V);
-  return std::nullopt;
+
+  auto It = SourceLocations.begin();
+  SafeBufferOptOutRegionsTy &Regions =
+      LoadedSafeBufferOptOutMap.findAndConsLoadedOptOutMap(*It, SourceMgr);
+
+  do {
+    SourceLocation Begin = *It++;
+    SourceLocation End = *It++;
+
+    Regions.emplace_back(Begin, End);
+  } while (It != SourceLocations.end());
+  return true;
 }
 
 ModuleLoader::~ModuleLoader() = default;
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index f1c3fe0fe456a..4e22b3e1187c0 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -3592,9 +3592,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
         unsigned Idx = 0;
         while (Idx < Record.size())
           SrcLocs.push_back(ReadSourceLocation(F, Record, Idx));
-        if (auto ErrMsg = PP.setDeserializedSafeBufferOptOutMap(SrcLocs))
-          return llvm::createStringError(std::errc::invalid_argument,
-                                         ErrMsg->data());
+        PP.setDeserializedSafeBufferOptOutMap(SrcLocs);
       }
       break;
     }
diff --git a/clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap b/clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap
index a4826c6514b98..3a17cccbfe7a6 100644
--- a/clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap
+++ b/clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap
@@ -2,9 +2,14 @@ module safe_buffers_test_base {
   header "base.h"
 }
 
+module safe_buffers_test_module {
+ textual header "test_module.h"
+}
+
 module safe_buffers_test_optout {
   explicit module test_sub1 {  header "test_sub1.h" }
   explicit module test_sub2 {  textual header "test_sub2.h" }
   use safe_buffers_test_base
+  use safe_buffers_test_module
 }
 
diff --git a/clang/test/Modules/Inputs/SafeBuffers/test_module.h b/clang/test/Modules/Inputs/SafeBuffers/test_module.h
new file mode 100644
index 0000000000000..a1c93081579f5
--- /dev/null
+++ b/clang/test/Modules/Inputs/SafeBuffers/test_module.h
@@ -0,0 +1,7 @@
+#ifdef __cplusplus
+int test_module(int *p) {
+  int x = p[5];
+  int y = p[5];
+  return x + y;
+}
+#endif
diff --git a/clang/test/Modules/safe_buffers_optout.cpp b/clang/test/Modules/safe_buffers_optout.cpp
index 09f42f7683e2b..0fe0636b83b22 100644
--- a/clang/test/Modules/safe_buffers_optout.cpp
+++ b/clang/test/Modules/safe_buffers_optout.cpp
@@ -1,8 +1,10 @@
 // RUN: rm -rf %t
 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module -fmodule-name=safe_buffers_test_base -x c++ %S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20\
 // RUN:     -o %t/safe_buffers_test_base.pcm -Wunsafe-buffer-usage
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module -fmodule-name=safe_buffers_test_module -x c++ %S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20\
+// RUN:     -o %t/safe_buffers_test_module.pcm
 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module -fmodule-name=safe_buffers_test_optout -x c++ %S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20\
-// RUN:     -o %t/safe_buffers_test_optout.pcm -fmodule-file=%t/safe_buffers_test_base.pcm -Wunsafe-buffer-usage
+// RUN:     -o %t/safe_buffers_test_optout.pcm -fmodule-file=%t/safe_buffers_test_base.pcm -fmodule-file=%t/safe_buffers_test_module.pcm -Wunsafe-buffer-usage
 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify -fmodule-file=%t/safe_buffers_test_optout.pcm -I %S/Inputs/SafeBuffers %s\
 // RUN:     -std=c++20 -Wunsafe-buffer-usage
 
@@ -33,3 +35,7 @@ int foo(int * p) {
   sub1_T(p); // instantiate template
   return base(p) + sub1(p) + sub2(p);
 }
+
+#pragma clang unsafe_buffer_usage begin
+#include "test_module.h"         // This module header is textually included (i.e., it is in the same TU as %s), so warnings are suppressed
+#pragma clang unsafe_buffer_usage end
diff --git a/clang/test/PCH/UnsafeBufferPragma/header-2.hpp b/clang/test/PCH/UnsafeBufferPragma/header-2.hpp
new file mode 100644
index 0000000000000..c0cc2834d7b7e
--- /dev/null
+++ b/clang/test/PCH/UnsafeBufferPragma/header-2.hpp
@@ -0,0 +1,19 @@
+#ifndef UNSAFE_HEADER
+#define UNSAFE_HEADER
+
+int foo(int *p) {
+  return p[5];  // This will be warned
+}
+
+_Pragma("clang unsafe_buffer_usage begin") // The opt-out region spans over two files of one TU
+#include "header-2.hpp"
+
+#else
+
+int bar(int *p) {
+  return p[5];
+}
+_Pragma("clang unsafe_buffer_usage end")
+#endif
+
+
diff --git a/clang/test/PCH/UnsafeBufferPragma/header.hpp b/clang/test/PCH/UnsafeBufferPragma/header.hpp
new file mode 100644
index 0000000000000..6cda502d80e61
--- /dev/null
+++ b/clang/test/PCH/UnsafeBufferPragma/header.hpp
@@ -0,0 +1,24 @@
+#ifndef UNSAFE_HEADER
+#define UNSAFE_HEADER
+//This is a PCH file:
+
+int foo(int *p) {
+  return p[5];  // This will be warned
+}
+
+_Pragma("clang unsafe_buffer_usage begin")
+#include "header.hpp"
+_Pragma("clang unsafe_buffer_usage end")
+
+#else
+// This part is included by the PCH in the traditional way.  The
+// include directive in the PCH is enclosed in an opt-out region, so
+// unsafe operations here is suppressed.
+
+int bar(int *p) {
+  return p[5];
+}
+
+#endif
+
+
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp b/clang/test/PCH/UnsafeBufferPragma/unsafe-buffer-usage-pragma-pch-complex.cpp
similarity index 90%
rename from clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp
rename to clang/test/PCH/UnsafeBufferPragma/unsafe-buffer-usage-pragma-pch-complex.cpp
index 04ac77b3f342e..f33947f4ec613 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp
+++ b/clang/test/PCH/UnsafeBufferPragma/unsafe-buffer-usage-pragma-pch-complex.cpp
@@ -1,9 +1,8 @@
-// A more complex example than warn-unsafe-buffer-usage-pragma-pch.cpp:
+// Test PCHs: 
 //   MAIN - includes INC_H_1 
 //        \ loads    PCH_H_1 - includes INC_H_2
 //                           \ loads    PCH_H_2   
 
-// Test with PCH
 // RUN: %clang_cc1 -Wno-unused-value -std=c++20 -emit-pch -o %t-1 -DPCH_H_2 %s
 // RUN: %clang_cc1 -Wno-unused-value -std=c++20 -include-pch %t-1 -emit-pch -o %t-2 -DPCH_H_1 %s
 // RUN: %clang_cc1 -Wno-unused-value -Wunsafe-buffer-usage -std=c++20 -include-pch %t-2 -DMAIN -verify %s
@@ -35,7 +34,7 @@ int b(int *s) {
 #ifdef PCH_H_1
 #undef PCH_H_1
 #define INC_H_2
-#include "warn-unsafe-buffer-usage-pragma-pch-complex.cpp"
+#include "unsafe-buffer-usage-pragma-pch-complex.cpp"
 
 int c(int *s) {
   s[2];  // <- expected warning here  
@@ -58,7 +57,7 @@ int d(int *s) {
 #ifdef MAIN
 #undef MAIN
 #define INC_H_1
-#include "warn-unsafe-buffer-usage-pragma-pch-complex.cpp"
+#include "unsafe-buffer-usage-pragma-pch-complex.cpp"
 
 // expected-warning at -45{{unsafe buffer access}} expected-note at -45{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
 // expected-warning at -36{{unsafe buffer access}} expected-note at -36{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
diff --git a/clang/test/PCH/UnsafeBufferPragma/unsafe-buffer-usage-pragma-pch-cross-files-2.cpp b/clang/test/PCH/UnsafeBufferPragma/unsafe-buffer-usage-pragma-pch-cross-files-2.cpp
new file mode 100644
index 0000000000000..14ba9f7cf880e
--- /dev/null
+++ b/clang/test/PCH/UnsafeBufferPragma/unsafe-buffer-usage-pragma-pch-cross-files-2.cpp
@@ -0,0 +1,9 @@
+
+// Test with PCH: See `header-2.hpp` for what are being tested.
+//
+// RUN: %clang_cc1 -Wno-unused-value -std=c++20 -emit-pch -o %t.pch %S/header-2.hpp
+// RUN: %clang_cc1 -Wno-unused-value -Wunsafe-buffer-usage -std=c++20 -include-pch %t.pch -verify %s
+// RUN: rm %t.pch
+
+// expected-warning at header-2.hpp:5 {{unsafe buffer access}}
+// expected-note at header-2.hpp:5 {{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
diff --git a/clang/test/PCH/UnsafeBufferPragma/unsafe-buffer-usage-pragma-pch-cross-files.cpp b/clang/test/PCH/UnsafeBufferPragma/unsafe-buffer-usage-pragma-pch-cross-files.cpp
new file mode 100644
index 0000000000000..8cc57470556c4
--- /dev/null
+++ b/clang/test/PCH/UnsafeBufferPragma/unsafe-buffer-usage-pragma-pch-cross-files.cpp
@@ -0,0 +1,8 @@
+// Test with PCH: See `header.hpp` for what are being tested.
+//
+// RUN: %clang_cc1 -Wno-unused-value -std=c++20 -emit-pch -o %t.pch %S/header.hpp
+// RUN: %clang_cc1 -Wno-unused-value -Wunsafe-buffer-usage -std=c++20 -include-pch %t.pch -verify %s
+// RUN: rm %t.pch
+
+// expected-warning at header.hpp:6 {{unsafe buffer access}}
+// expected-note at header.hpp:6 {{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
diff --git a/clang/test/PCH/unsafe-buffer-usage-pragma-pch-bad.cpp b/clang/test/PCH/unsafe-buffer-usage-pragma-pch-bad.cpp
deleted file mode 100644
index 7713fb47cc55f..0000000000000
--- a/clang/test/PCH/unsafe-buffer-usage-pragma-pch-bad.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-// Test that a serialized safe buffer opt-out region begins and ends at different files.
-// RUN: %clang_cc1 -Wno-unused-value -std=c++20 -emit-pch -o %t %s -include %s
-// RUN: not %clang_cc1 -Wno-unused-value -Wunsafe-buffer-usage -std=c++20 -include-pch %t %s 2>&1 | FileCheck %s
-
-// CHECK: fatal error:{{.*}}'Cannot de-serialize a safe buffer opt-out region that begins and ends at different files'
-#ifndef A_H
-#define A_H
-
-int a() {
-#pragma clang unsafe_buffer_usage begin
-  return 0;
-}
-
-#elif (defined(A_H) && !defined(B_H))
-#define B_H
-
-#pragma clang unsafe_buffer_usage end
-
-#else
-
-int main() {
-  return a();
-}
-
-#endif



More information about the cfe-commits mailing list