[clang] [Clang] Fix an out of bound access in -verify comment parsing (PR #141940)
via cfe-commits
cfe-commits at lists.llvm.org
Thu May 29 09:38:23 PDT 2025
https://github.com/cor3ntin updated https://github.com/llvm/llvm-project/pull/141940
>From ca2ea7359ac2f0f0ea41396990847479b5a3e2b7 Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Thu, 29 May 2025 15:14:55 +0200
Subject: [PATCH 1/2] [Clang] Fix an out of bound access in -verify comment
parsing
When the comment ends with a splice at EOF.
Fixes #141221
---
clang/lib/Frontend/VerifyDiagnosticConsumer.cpp | 2 +-
clang/test/Frontend/verify-gh141221.c | 6 ++++++
2 files changed, 7 insertions(+), 1 deletion(-)
create mode 100644 clang/test/Frontend/verify-gh141221.c
diff --git a/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp b/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp
index 89fda3e839cb9..eb241f34d095e 100644
--- a/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp
+++ b/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp
@@ -812,7 +812,7 @@ bool VerifyDiagnosticConsumer::HandleComment(Preprocessor &PP,
C2 += C.substr(last, loc-last);
last = loc + 1;
- if (C[last] == '\n' || C[last] == '\r') {
+ if (last < C.size() && (C[last] == '\n' || C[last] == '\r')) {
++last;
// Escape \r\n or \n\r, but not \n\n.
diff --git a/clang/test/Frontend/verify-gh141221.c b/clang/test/Frontend/verify-gh141221.c
new file mode 100644
index 0000000000000..eb0be46a3da8b
--- /dev/null
+++ b/clang/test/Frontend/verify-gh141221.c
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -verify %s
+
+// Check that we don't crash if the file ends in a splice
+// This file should *NOT* end with a new line
+a;
+// expected-error at -1 {{}} \
\ No newline at end of file
>From 10f354d150ccbb9c35885f5daba6158363df0e43 Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Thu, 29 May 2025 18:28:57 +0200
Subject: [PATCH 2/2] [Clang] Improve infrastructure for libstdc++ workarounds
This introduces a way detect the libstdc++ version,
use that to enable workarounds.
The version is cached.
This should make it easier in the future to find and remove
these hacks.
I did not find the need for enabling a hack between or after
specific versions, so it's left as a future exercise.
We can extend this fature to other libraries as the need arise.
---
clang/include/clang/Lex/Preprocessor.h | 15 +++++++
clang/lib/Lex/PPExpressions.cpp | 45 +++++++++++++++++++
clang/lib/Sema/SemaDeclCXX.cpp | 1 +
clang/lib/Sema/SemaExceptionSpec.cpp | 3 ++
clang/lib/Sema/SemaInit.cpp | 7 ++-
clang/lib/Sema/SemaTemplate.cpp | 25 ++---------
.../lib/Sema/SemaTemplateInstantiateDecl.cpp | 24 +++++-----
.../SemaCXX/libstdcxx_common_type_hack.cpp | 2 +-
.../libstdcxx_explicit_init_list_hack.cpp | 2 +-
.../test/SemaCXX/libstdcxx_pair_swap_hack.cpp | 18 ++++----
10 files changed, 96 insertions(+), 46 deletions(-)
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index f2dfd3a349b8b..442d5dd6cb006 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -129,6 +129,12 @@ enum class EmbedResult {
Empty = 2, // Corresponds to __STDC_EMBED_EMPTY__
};
+struct CXXStandardLibraryVersionInfo {
+ enum Library { Unknow, LibStdCXX };
+ Library Lib;
+ unsigned Version;
+};
+
/// Engages in a tight little dance with the lexer to efficiently
/// preprocess tokens.
///
@@ -2706,6 +2712,15 @@ class Preprocessor {
return IsFileLexer(CurLexer.get(), CurPPLexer);
}
+ //===--------------------------------------------------------------------===//
+ // Standard Library Identification
+ std::optional<CXXStandardLibraryVersionInfo> CXXStandardLibraryVersion;
+
+public:
+ std::optional<unsigned> getStdLibCxxVersion();
+ bool NeedsStdLibCxxWorkaroundBefore(unsigned FixedVersion);
+
+private:
//===--------------------------------------------------------------------===//
// Caching stuff.
void CachingLex(Token &Result);
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index cf7e32bee2e71..2a40b2c93a853 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -979,3 +979,48 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
return EvaluateDirectiveExpression(IfNDefMacro, Tok, EvaluatedDefined,
CheckForEoD);
}
+
+static std::optional<CXXStandardLibraryVersionInfo>
+getCXXStandardLibraryVersion(Preprocessor &PP, StringRef MacroName,
+ CXXStandardLibraryVersionInfo::Library Lib) {
+ MacroInfo *Macro = PP.getMacroInfo(PP.getIdentifierInfo(MacroName));
+
+ if (!Macro || Macro->getNumTokens() != 1)
+ return std::nullopt;
+
+ const Token &RevisionDateTok = Macro->getReplacementToken(0);
+
+ bool Invalid = false;
+ llvm::SmallVector<char, 10> Buffer;
+ llvm::StringRef RevisionDate =
+ PP.getSpelling(RevisionDateTok, Buffer, &Invalid);
+ if (!Invalid) {
+ unsigned Value;
+ // We don't use NumericParser to avoid diagnostics
+ if (!RevisionDate.consumeInteger(10, Value))
+ return CXXStandardLibraryVersionInfo{
+ CXXStandardLibraryVersionInfo::LibStdCXX, Value};
+ }
+ return CXXStandardLibraryVersionInfo{CXXStandardLibraryVersionInfo::Unknow,
+ 0};
+}
+
+std::optional<unsigned> Preprocessor::getStdLibCxxVersion() {
+ if (!CXXStandardLibraryVersion)
+ CXXStandardLibraryVersion = getCXXStandardLibraryVersion(
+ *this, "__GLIBCXX__", CXXStandardLibraryVersionInfo::LibStdCXX);
+ if (!CXXStandardLibraryVersion)
+ return std::nullopt;
+
+ if (CXXStandardLibraryVersion->Lib ==
+ CXXStandardLibraryVersionInfo::LibStdCXX)
+ return CXXStandardLibraryVersion->Version;
+ return std::nullopt;
+}
+
+bool Preprocessor::NeedsStdLibCxxWorkaroundBefore(unsigned FixedVersion) {
+ std::optional<unsigned> Ver = getStdLibCxxVersion();
+ if (!Ver)
+ return false;
+ return *Ver < FixedVersion;
+}
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 4a735992cec68..8d8108ee33c11 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -13198,6 +13198,7 @@ NamedDecl *Sema::BuildUsingDeclaration(
if (getLangOpts().CPlusPlus14 && II && II->isStr("gets") &&
CurContext->isStdNamespace() &&
isa<TranslationUnitDecl>(LookupContext) &&
+ PP.NeedsStdLibCxxWorkaroundBefore(20161221) &&
getSourceManager().isInSystemHeader(UsingLoc))
return nullptr;
UsingValidatorCCC CCC(HasTypenameKeyword, IsInstantiation, SS.getScopeRep(),
diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp
index c83eab53891ca..c83bbaa0375a0 100644
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -18,6 +18,7 @@
#include "clang/AST/TypeLoc.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/SemaInternal.h"
#include "llvm/ADT/SmallPtrSet.h"
#include <optional>
@@ -44,6 +45,8 @@ static const FunctionProtoType *GetUnderlyingFunction(QualType T)
bool Sema::isLibstdcxxEagerExceptionSpecHack(const Declarator &D) {
auto *RD = dyn_cast<CXXRecordDecl>(CurContext);
+ if (!getPreprocessor().NeedsStdLibCxxWorkaroundBefore(20160427))
+ return false;
// All the problem cases are member functions named "swap" within class
// templates declared directly within namespace std or std::__debug or
// std::__profile.
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index 776cb022e6925..4f9fa40ea7575 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -21,6 +21,7 @@
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/Specifiers.h"
#include "clang/Basic/TargetInfo.h"
+#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/Designator.h"
#include "clang/Sema/EnterExpressionEvaluationContext.h"
#include "clang/Sema/Initialization.h"
@@ -641,8 +642,10 @@ ExprResult InitListChecker::PerformEmptyInit(SourceLocation Loc,
// in that case. stlport does so too.
// Look for std::__debug for libstdc++, and for std:: for stlport.
// This is effectively a compiler-side implementation of LWG2193.
- if (!InitSeq && EmptyInitList && InitSeq.getFailureKind() ==
- InitializationSequence::FK_ExplicitConstructor) {
+ if (!InitSeq && EmptyInitList &&
+ InitSeq.getFailureKind() ==
+ InitializationSequence::FK_ExplicitConstructor &&
+ SemaRef.getPreprocessor().NeedsStdLibCxxWorkaroundBefore(20140422)) {
OverloadCandidateSet::iterator Best;
OverloadingResult O =
InitSeq.getFailedCandidateSet()
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 10e7823542f0b..6638cbda64a41 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -4444,16 +4444,8 @@ static bool IsLibstdcxxStdFormatKind(Preprocessor &PP, VarDecl *Var) {
!Var->getDeclContext()->isStdNamespace())
return false;
- MacroInfo *MacroGLIBCXX =
- PP.getMacroInfo(PP.getIdentifierInfo("__GLIBCXX__"));
-
- if (!MacroGLIBCXX || MacroGLIBCXX->getNumTokens() != 1)
- return false;
-
- const Token &RevisionDateTok = MacroGLIBCXX->getReplacementToken(0);
- bool Invalid = false;
- std::string RevisionDate = PP.getSpelling(RevisionDateTok, &Invalid);
-
+ // Checking old versions of libstdc++ is not needed because 15.1 is the first
+ // release in which users can access std::format_kind.
// We can use 20250520 as the final date, see the following commits.
// GCC releases/gcc-15 branch:
// https://gcc.gnu.org/g:fedf81ef7b98e5c9ac899b8641bb670746c51205
@@ -4461,18 +4453,7 @@ static bool IsLibstdcxxStdFormatKind(Preprocessor &PP, VarDecl *Var) {
// GCC master branch:
// https://gcc.gnu.org/g:9361966d80f625c5accc25cbb439f0278dd8b278
// https://gcc.gnu.org/g:c65725eccbabf3b9b5965f27fff2d3b9f6c75930
- StringRef FixDate = "20250520";
-
- if (Invalid)
- return false;
-
- // The format of the revision date is in compressed ISO date format.
- // See https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_macros.html
- // So we can use string comparison.
- //
- // Checking old versions of libstdc++ is not needed because 15.1 is the first
- // release in which users can access std::format_kind.
- return RevisionDate.size() == 8 && RevisionDate < FixDate;
+ return PP.NeedsStdLibCxxWorkaroundBefore(20250520);
}
} // end anonymous namespace
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 174c8fc59e4fa..e848372a54bd3 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -1449,17 +1449,19 @@ Decl *TemplateDeclInstantiator::InstantiateTypedefNameDecl(TypedefNameDecl *D,
// happen to be processing that implementation, fake up the g++ ?:
// semantics. See LWG issue 2141 for more information on the bug. The bugs
// are fixed in g++ and libstdc++ 4.9.0 (2014-04-22).
- const DecltypeType *DT = DI->getType()->getAs<DecltypeType>();
- CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(D->getDeclContext());
- if (DT && RD && isa<ConditionalOperator>(DT->getUnderlyingExpr()) &&
- DT->isReferenceType() &&
- RD->getEnclosingNamespaceContext() == SemaRef.getStdNamespace() &&
- RD->getIdentifier() && RD->getIdentifier()->isStr("common_type") &&
- D->getIdentifier() && D->getIdentifier()->isStr("type") &&
- SemaRef.getSourceManager().isInSystemHeader(D->getBeginLoc()))
- // Fold it to the (non-reference) type which g++ would have produced.
- DI = SemaRef.Context.getTrivialTypeSourceInfo(
- DI->getType().getNonReferenceType());
+ if (SemaRef.getPreprocessor().NeedsStdLibCxxWorkaroundBefore(20140422)) {
+ const DecltypeType *DT = DI->getType()->getAs<DecltypeType>();
+ CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(D->getDeclContext());
+ if (DT && RD && isa<ConditionalOperator>(DT->getUnderlyingExpr()) &&
+ DT->isReferenceType() &&
+ RD->getEnclosingNamespaceContext() == SemaRef.getStdNamespace() &&
+ RD->getIdentifier() && RD->getIdentifier()->isStr("common_type") &&
+ D->getIdentifier() && D->getIdentifier()->isStr("type") &&
+ SemaRef.getSourceManager().isInSystemHeader(D->getBeginLoc()))
+ // Fold it to the (non-reference) type which g++ would have produced.
+ DI = SemaRef.Context.getTrivialTypeSourceInfo(
+ DI->getType().getNonReferenceType());
+ }
// Create the new typedef
TypedefNameDecl *Typedef;
diff --git a/clang/test/SemaCXX/libstdcxx_common_type_hack.cpp b/clang/test/SemaCXX/libstdcxx_common_type_hack.cpp
index e9cb22f9dabfa..0e7ba5d6ef0c9 100644
--- a/clang/test/SemaCXX/libstdcxx_common_type_hack.cpp
+++ b/clang/test/SemaCXX/libstdcxx_common_type_hack.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only %s -std=c++11 -verify
+// RUN: %clang_cc1 -fsyntax-only %s -std=c++11 -verify -D__GLIBCXX__=20100000L
// This is a test for an egregious hack in Clang that works around
// an issue with GCC's <type_traits> implementation. std::common_type
diff --git a/clang/test/SemaCXX/libstdcxx_explicit_init_list_hack.cpp b/clang/test/SemaCXX/libstdcxx_explicit_init_list_hack.cpp
index f9e0a5c0a1f02..c85af1b1cd567 100644
--- a/clang/test/SemaCXX/libstdcxx_explicit_init_list_hack.cpp
+++ b/clang/test/SemaCXX/libstdcxx_explicit_init_list_hack.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -Wsystem-headers %s
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -Wsystem-headers -D__GLIBCXX__=20100000L %s
// libstdc++4.6 in debug mode has explicit default constructors.
// stlport has this for all containers.
diff --git a/clang/test/SemaCXX/libstdcxx_pair_swap_hack.cpp b/clang/test/SemaCXX/libstdcxx_pair_swap_hack.cpp
index dff599b6d5b66..6b8ca4f740914 100644
--- a/clang/test/SemaCXX/libstdcxx_pair_swap_hack.cpp
+++ b/clang/test/SemaCXX/libstdcxx_pair_swap_hack.cpp
@@ -7,20 +7,20 @@
// The same problem afflicts a bunch of other class templates. Those
// affected are array, pair, priority_queue, stack, and queue.
-// RUN: %clang_cc1 -fsyntax-only %s -std=c++11 -verify -fexceptions -fcxx-exceptions -DCLASS=array
-// RUN: %clang_cc1 -fsyntax-only %s -std=c++11 -verify -fexceptions -fcxx-exceptions -DCLASS=array -DPR28423
-// RUN: %clang_cc1 -fsyntax-only %s -std=c++11 -verify -fexceptions -fcxx-exceptions -DCLASS=pair
-// RUN: %clang_cc1 -fsyntax-only %s -std=c++11 -verify -fexceptions -fcxx-exceptions -DCLASS=priority_queue
-// RUN: %clang_cc1 -fsyntax-only %s -std=c++11 -verify -fexceptions -fcxx-exceptions -DCLASS=stack
-// RUN: %clang_cc1 -fsyntax-only %s -std=c++11 -verify -fexceptions -fcxx-exceptions -DCLASS=queue
+// RUN: %clang_cc1 -fsyntax-only %s -std=c++11 -verify -fexceptions -fcxx-exceptions -D__GLIBCXX__=20100000L -DCLASS=array
+// RUN: %clang_cc1 -fsyntax-only %s -std=c++11 -verify -fexceptions -fcxx-exceptions -D__GLIBCXX__=20100000L -DCLASS=array -DPR28423
+// RUN: %clang_cc1 -fsyntax-only %s -std=c++11 -verify -fexceptions -fcxx-exceptions -D__GLIBCXX__=20100000L -DCLASS=pair
+// RUN: %clang_cc1 -fsyntax-only %s -std=c++11 -verify -fexceptions -fcxx-exceptions -D__GLIBCXX__=20100000L -DCLASS=priority_queue
+// RUN: %clang_cc1 -fsyntax-only %s -std=c++11 -verify -fexceptions -fcxx-exceptions -D__GLIBCXX__=20100000L -DCLASS=stack
+// RUN: %clang_cc1 -fsyntax-only %s -std=c++11 -verify -fexceptions -fcxx-exceptions -D__GLIBCXX__=20100000L -DCLASS=queue
//
-// RUN: %clang_cc1 -fsyntax-only %s -std=c++11 -verify -fexceptions -fcxx-exceptions -DCLASS=array -DNAMESPACE=__debug
-// RUN: %clang_cc1 -fsyntax-only %s -std=c++11 -verify -fexceptions -fcxx-exceptions -DCLASS=array -DNAMESPACE=__profile
+// RUN: %clang_cc1 -fsyntax-only %s -std=c++11 -verify -fexceptions -fcxx-exceptions -D__GLIBCXX__=20100000L -DCLASS=array -DNAMESPACE=__debug
+// RUN: %clang_cc1 -fsyntax-only %s -std=c++11 -verify -fexceptions -fcxx-exceptions -D__GLIBCXX__=20100000L -DCLASS=array -DNAMESPACE=__profile
// MSVC's standard library uses a very similar pattern that relies on delayed
// parsing of exception specifications.
//
-// RUN: %clang_cc1 -fsyntax-only %s -std=c++11 -verify -fexceptions -fcxx-exceptions -DCLASS=array -DMSVC
+// RUN: %clang_cc1 -fsyntax-only %s -std=c++11 -verify -fexceptions -fcxx-exceptions -D__GLIBCXX__=20100000L -DCLASS=array -DMSVC
#ifdef BE_THE_HEADER
More information about the cfe-commits
mailing list