[llvm-branch-commits] [clang] [llvm] Add format string handling (PR #196568)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri May 8 09:26:42 PDT 2026
llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-support
Author: Abhina Sree (abhina-sree)
<details>
<summary>Changes</summary>
---
Patch is 42.41 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/196568.diff
18 Files Affected:
- (modified) clang/include/clang/AST/Expr.h (+6)
- (modified) clang/include/clang/AST/FormatString.h (+7-5)
- (modified) clang/include/clang/Basic/TargetInfo.h (+3)
- (modified) clang/include/clang/Lex/TextEncodingConfig.h (+2-1)
- (modified) clang/include/clang/Sema/Sema.h (+1-1)
- (modified) clang/lib/AST/Expr.cpp (+14)
- (modified) clang/lib/AST/FormatString.cpp (+46-40)
- (modified) clang/lib/AST/FormatStringParsing.h (+25-11)
- (modified) clang/lib/AST/PrintfFormatString.cpp (+58-31)
- (modified) clang/lib/AST/ScanfFormatString.cpp (+15-8)
- (modified) clang/lib/Basic/TargetInfo.cpp (+3)
- (modified) clang/lib/Frontend/CompilerInstance.cpp (+1-1)
- (modified) clang/lib/Lex/TextEncodingConfig.cpp (+10-1)
- (modified) clang/lib/Sema/SemaChecking.cpp (+33-21)
- (modified) clang/lib/Sema/SemaExpr.cpp (+3-2)
- (modified) clang/test/CodeGen/systemz-charset.c (+2)
- (modified) llvm/include/llvm/Support/TextEncoding.h (+10)
- (modified) llvm/lib/Support/TextEncoding.cpp (+19)
``````````diff
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 393fe275c6269..d01afcff4095d 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -28,6 +28,7 @@
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SyncScope.h"
#include "clang/Basic/TypeTraits.h"
+#include "clang/Lex/TextEncodingConfig.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SmallVector.h"
@@ -2066,6 +2067,11 @@ class PredefinedExpr final
return getIdentKindName(getIdentKind());
}
+ static std::string
+ ComputeNameAndTranslate(PredefinedIdentKind IK, const Decl *CurrentDecl,
+ TextEncodingConfig &TEC,
+ bool ForceElaboratedPrinting = false);
+
static std::string ComputeName(PredefinedIdentKind IK,
const Decl *CurrentDecl,
bool ForceElaboratedPrinting = false);
diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index a3382e1a1d007..a24ade2d71ee9 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -19,6 +19,7 @@
#define LLVM_CLANG_AST_FORMATSTRING_H
#include "clang/AST/CanonicalType.h"
+#include "llvm/Support/TextEncoding.h"
#include <optional>
namespace clang {
@@ -728,7 +729,8 @@ class FormatStringHandler {
virtual bool HandleInvalidPrintfConversionSpecifier(
const analyze_printf::PrintfSpecifier &FS, const char *startSpecifier,
- unsigned specifierLen) {
+ unsigned specifierLen,
+ const llvm::TextEncodingConverter &FormatStrConverter) {
return true;
}
@@ -744,10 +746,10 @@ class FormatStringHandler {
// Scanf-specific handlers.
- virtual bool
- HandleInvalidScanfConversionSpecifier(const analyze_scanf::ScanfSpecifier &FS,
- const char *startSpecifier,
- unsigned specifierLen) {
+ virtual bool HandleInvalidScanfConversionSpecifier(
+ const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier,
+ unsigned specifierLen,
+ const llvm::TextEncodingConverter &FormatStrConverter) {
return true;
}
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 9f7d2a17a0f8a..ec7d4fcd4d8e3 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -38,6 +38,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/TextEncoding.h"
#include "llvm/Support/VersionTuple.h"
#include "llvm/TargetParser/Triple.h"
#include <cassert>
@@ -323,6 +324,8 @@ class TargetInfo : public TransferrableTargetInfo,
virtual ~TargetInfo();
+ llvm::TextEncodingConverter *FormatStrConverter;
+
/// Retrieve the target options.
TargetOptions &getTargetOpts() const {
assert(TargetOpts && "Missing target options");
diff --git a/clang/include/clang/Lex/TextEncodingConfig.h b/clang/include/clang/Lex/TextEncodingConfig.h
index 09967a81beeed..f4ef578eb2991 100644
--- a/clang/include/clang/Lex/TextEncodingConfig.h
+++ b/clang/include/clang/Lex/TextEncodingConfig.h
@@ -26,7 +26,8 @@ class TextEncodingConfig {
llvm::TextEncodingConverter *getConverter(ConversionAction Action) const;
static std::error_code
setConvertersFromOptions(TextEncodingConfig &TEC,
- const clang::LangOptions &Opts);
+ const clang::LangOptions &Opts,
+ clang::TargetInfo &TInfo);
llvm::StringRef getExecEncoding() { return ExecEncoding; }
};
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index e2bc5593efa97..8ac5cc175fd2f 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -55,7 +55,7 @@
#include "clang/Basic/TemplateKinds.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Basic/TypeTraits.h"
-#include "clang/Lex/LiteralConverter.h"
+#include "clang/Lex/TextEncodingConfig.h"
#include "clang/Sema/AnalysisBasedWarnings.h"
#include "clang/Sema/Attr.h"
#include "clang/Sema/CleanupInfo.h"
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 64d61dbc3d128..e067df4cefd7b 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -668,6 +668,20 @@ StringRef PredefinedExpr::getIdentKindName(PredefinedIdentKind IK) {
llvm_unreachable("Unknown ident kind for PredefinedExpr");
}
+std::string PredefinedExpr::ComputeNameAndTranslate(
+ PredefinedIdentKind IK, const Decl *CurrentDecl, TextEncodingConfig &TEC,
+ bool ForceElaboratedPrinting) {
+ using namespace clang::charinfo;
+ std::string Result = ComputeName(IK, CurrentDecl, ForceElaboratedPrinting);
+ llvm::TextEncodingConverter *Converter = TEC.getConverter(CA_ToExecEncoding);
+ if (Converter) {
+ SmallString<128> Converted;
+ Converter->convert(Result, Converted);
+ Result = std::string(Converted);
+ }
+ return Result;
+}
+
// FIXME: Maybe this should use DeclPrinter with a special "print predefined
// expr" policy instead.
std::string PredefinedExpr::ComputeName(PredefinedIdentKind IK,
diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp
index 7e1ac0de6dcaf..0d449fb5f0904 100644
--- a/clang/lib/AST/FormatString.cpp
+++ b/clang/lib/AST/FormatString.cpp
@@ -33,8 +33,9 @@ FormatStringHandler::~FormatStringHandler() {}
// scanf format strings.
//===----------------------------------------------------------------------===//
-OptionalAmount clang::analyze_format_string::ParseAmount(const char *&Beg,
- const char *E) {
+OptionalAmount clang::analyze_format_string::ParseAmount(
+ const char *&Beg, const char *E,
+ const llvm::TextEncodingConverter &FormatStrConverter) {
const char *I = Beg;
UpdateOnReturn<const char *> UpdateBeg(Beg, I);
@@ -42,7 +43,7 @@ OptionalAmount clang::analyze_format_string::ParseAmount(const char *&Beg,
bool hasDigits = false;
for (; I != E; ++I) {
- char c = *I;
+ char c = FormatStrConverter.convert(*I);
if (c >= '0' && c <= '9') {
hasDigits = true;
accumulator = (accumulator * 10) + (c - '0');
@@ -60,21 +61,22 @@ OptionalAmount clang::analyze_format_string::ParseAmount(const char *&Beg,
}
OptionalAmount clang::analyze_format_string::ParseNonPositionAmount(
- const char *&Beg, const char *E, unsigned &argIndex) {
- if (*Beg == '*') {
+ const char *&Beg, const char *E, unsigned &argIndex,
+ const llvm::TextEncodingConverter &FormatStrConverter) {
+ if (FormatStrConverter.convert(*Beg) == '*') {
++Beg;
return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false);
}
- return ParseAmount(Beg, E);
+ return ParseAmount(Beg, E, FormatStrConverter);
}
OptionalAmount clang::analyze_format_string::ParsePositionAmount(
FormatStringHandler &H, const char *Start, const char *&Beg, const char *E,
- PositionContext p) {
- if (*Beg == '*') {
+ PositionContext p, const llvm::TextEncodingConverter &FormatStrConverter) {
+ if (FormatStrConverter.convert(*Beg) == '*') {
const char *I = Beg + 1;
- const OptionalAmount &Amt = ParseAmount(I, E);
+ const OptionalAmount &Amt = ParseAmount(I, E, FormatStrConverter);
if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
H.HandleInvalidPosition(Beg, I - Beg, p);
@@ -89,7 +91,7 @@ OptionalAmount clang::analyze_format_string::ParsePositionAmount(
assert(Amt.getHowSpecified() == OptionalAmount::Constant);
- if (*I == '$') {
+ if (FormatStrConverter.convert(*I) == '$') {
// Handle positional arguments
// Special case: '*0$', since this is an easy mistake.
@@ -109,18 +111,21 @@ OptionalAmount clang::analyze_format_string::ParsePositionAmount(
return OptionalAmount(false);
}
- return ParseAmount(Beg, E);
+ return ParseAmount(Beg, E, FormatStrConverter);
}
bool clang::analyze_format_string::ParseFieldWidth(
FormatStringHandler &H, FormatSpecifier &CS, const char *Start,
- const char *&Beg, const char *E, unsigned *argIndex) {
+ const char *&Beg, const char *E, unsigned *argIndex,
+ const llvm::TextEncodingConverter &FormatStrConverter) {
// FIXME: Support negative field widths.
if (argIndex) {
- CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
+ CS.setFieldWidth(
+ ParseNonPositionAmount(Beg, E, *argIndex, FormatStrConverter));
} else {
const OptionalAmount Amt = ParsePositionAmount(
- H, Start, Beg, E, analyze_format_string::FieldWidthPos);
+ H, Start, Beg, E, analyze_format_string::FieldWidthPos,
+ FormatStrConverter);
if (Amt.isInvalid())
return true;
@@ -129,14 +134,13 @@ bool clang::analyze_format_string::ParseFieldWidth(
return false;
}
-bool clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H,
- FormatSpecifier &FS,
- const char *Start,
- const char *&Beg,
- const char *E) {
+bool clang::analyze_format_string::ParseArgPosition(
+ FormatStringHandler &H, FormatSpecifier &FS, const char *Start,
+ const char *&Beg, const char *E,
+ const llvm::TextEncodingConverter &FormatStrConverter) {
const char *I = Beg;
- const OptionalAmount &Amt = ParseAmount(I, E);
+ const OptionalAmount &Amt = ParseAmount(I, E, FormatStrConverter);
if (I == E) {
// No more characters left?
@@ -144,7 +148,8 @@ bool clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H,
return true;
}
- if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
+ if (Amt.getHowSpecified() == OptionalAmount::Constant &&
+ FormatStrConverter.convert(*(I++)) == '$') {
// Warn that positional arguments are non-standard.
H.HandlePosition(Start, I - Start);
@@ -165,16 +170,15 @@ bool clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H,
return false;
}
-bool clang::analyze_format_string::ParseVectorModifier(FormatStringHandler &H,
- FormatSpecifier &FS,
- const char *&I,
- const char *E,
- const LangOptions &LO) {
+bool clang::analyze_format_string::ParseVectorModifier(
+ FormatStringHandler &H, FormatSpecifier &FS, const char *&I, const char *E,
+ const LangOptions &LO,
+ const llvm::TextEncodingConverter &FormatStrConverter) {
if (!LO.OpenCL)
return false;
const char *Start = I;
- if (*I == 'v') {
+ if (FormatStrConverter.convert(*I) == 'v') {
++I;
if (I == E) {
@@ -182,7 +186,7 @@ bool clang::analyze_format_string::ParseVectorModifier(FormatStringHandler &H,
return true;
}
- OptionalAmount NumElts = ParseAmount(I, E);
+ OptionalAmount NumElts = ParseAmount(I, E, FormatStrConverter);
if (NumElts.getHowSpecified() != OptionalAmount::Constant) {
H.HandleIncompleteSpecifier(Start, E - Start);
return true;
@@ -194,22 +198,20 @@ bool clang::analyze_format_string::ParseVectorModifier(FormatStringHandler &H,
return false;
}
-bool clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
- const char *&I,
- const char *E,
- const LangOptions &LO,
- bool IsScanf) {
+bool clang::analyze_format_string::ParseLengthModifier(
+ FormatSpecifier &FS, const char *&I, const char *E, const LangOptions &LO,
+ const llvm::TextEncodingConverter &FormatStrConverter, bool IsScanf) {
LengthModifier::Kind lmKind = LengthModifier::None;
const char *lmPosition = I;
- switch (*I) {
+ switch (FormatStrConverter.convert(*I)) {
default:
return false;
case 'h':
++I;
- if (I != E && *I == 'h') {
+ if (I != E && FormatStrConverter.convert(*I) == 'h') {
++I;
lmKind = LengthModifier::AsChar;
- } else if (I != E && *I == 'l' && LO.OpenCL) {
+ } else if (I != E && FormatStrConverter.convert(*I) == 'l' && LO.OpenCL) {
++I;
lmKind = LengthModifier::AsShortLong;
} else {
@@ -218,7 +220,7 @@ bool clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
break;
case 'l':
++I;
- if (I != E && *I == 'l') {
+ if (I != E && FormatStrConverter.convert(*I) == 'l') {
++I;
lmKind = LengthModifier::AsLongLong;
} else {
@@ -251,7 +253,9 @@ bool clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
// be parsed as the GNU extension 'a' length modifier. If not, this
// will be parsed as a conversion specifier.
++I;
- if (I != E && (*I == 's' || *I == 'S' || *I == '[')) {
+ if (I != E && (FormatStrConverter.convert(*I) == 's' ||
+ FormatStrConverter.convert(*I) == 'S' ||
+ FormatStrConverter.convert(*I) == '[')) {
lmKind = LengthModifier::AsAllocate;
break;
}
@@ -269,7 +273,8 @@ bool clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
// scanf: AsInt64
case 'I':
if (I + 1 != E && I + 2 != E) {
- if (I[1] == '6' && I[2] == '4') {
+ if (FormatStrConverter.convert(I[1]) == '6' &&
+ FormatStrConverter.convert(I[2]) == '4') {
I += 3;
lmKind = LengthModifier::AsInt64;
break;
@@ -277,7 +282,8 @@ bool clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
if (IsScanf)
return false;
- if (I[1] == '3' && I[2] == '2') {
+ if (FormatStrConverter.convert(I[1]) == '3' &&
+ FormatStrConverter.convert(I[2]) == '2') {
I += 3;
lmKind = LengthModifier::AsInt32;
break;
diff --git a/clang/lib/AST/FormatStringParsing.h b/clang/lib/AST/FormatStringParsing.h
index 401528481a9d6..531bc291e0b5b 100644
--- a/clang/lib/AST/FormatStringParsing.h
+++ b/clang/lib/AST/FormatStringParsing.h
@@ -35,29 +35,43 @@ template <typename T> class UpdateOnReturn {
namespace analyze_format_string {
-OptionalAmount ParseAmount(const char *&Beg, const char *E);
-OptionalAmount ParseNonPositionAmount(const char *&Beg, const char *E,
- unsigned &argIndex);
+OptionalAmount
+ParseAmount(const char *&Beg, const char *E,
+ const llvm::TextEncodingConverter &FormatStrConverter);
-OptionalAmount ParsePositionAmount(FormatStringHandler &H, const char *Start,
- const char *&Beg, const char *E,
- PositionContext p);
+OptionalAmount
+ParseNonPositionAmount(const char *&Beg, const char *E, unsigned &argIndex,
+ const llvm::TextEncodingConverter &FormatStrConverter);
+
+OptionalAmount
+ParsePositionAmount(FormatStringHandler &H, const char *Start, const char *&Beg,
+ const char *E, PositionContext p,
+ const llvm::TextEncodingConverter &FormatStrConverter);
+
+OptionalAmount
+ParsePositionAmount(FormatStringHandler &H, const char *Start, const char *&Beg,
+ const char *E, PositionContext p,
+ const llvm::TextEncodingConverter &FormatStrConverter);
bool ParseFieldWidth(FormatStringHandler &H, FormatSpecifier &CS,
const char *Start, const char *&Beg, const char *E,
- unsigned *argIndex);
+ unsigned *argIndex,
+ const llvm::TextEncodingConverter &FormatStrConverter);
bool ParseArgPosition(FormatStringHandler &H, FormatSpecifier &CS,
- const char *Start, const char *&Beg, const char *E);
+ const char *Start, const char *&Beg, const char *E,
+ const llvm::TextEncodingConverter &FormatStrConverter);
bool ParseVectorModifier(FormatStringHandler &H, FormatSpecifier &FS,
- const char *&Beg, const char *E,
- const LangOptions &LO);
+ const char *&Beg, const char *E, const LangOptions &LO,
+ const llvm::TextEncodingConverter &FormatStrConverter);
/// Returns true if a LengthModifier was parsed and installed in the
/// FormatSpecifier& argument, and false otherwise.
bool ParseLengthModifier(FormatSpecifier &FS, const char *&Beg, const char *E,
- const LangOptions &LO, bool IsScanf = false);
+ const LangOptions &LO,
+ const llvm::TextEncodingConverter &FormatStrConverter,
+ bool IsScanf = false);
/// Returns true if the invalid specifier in \p SpecifierBegin is a UTF-8
/// string; check that it won't go further than \p FmtStrEnd and write
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index 6610a2de9e083..7efcc554ec136 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -35,14 +35,17 @@ typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
using analyze_format_string::ParseNonPositionAmount;
-static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
- const char *Start, const char *&Beg, const char *E,
- unsigned *argIndex) {
+static bool
+ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS, const char *Start,
+ const char *&Beg, const char *E, unsigned *argIndex,
+ const llvm::TextEncodingConverter &FormatStrConverter) {
if (argIndex) {
- FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
+ FS.setPrecision(
+ ParseNonPositionAmount(Beg, E, *argIndex, FormatStrConverter));
} else {
const OptionalAmount Amt = ParsePositionAmount(
- H, Start, Beg, E, analyze_format_string::PrecisionPos);
+ H, Start, Beg, E, analyze_format_string::PrecisionPos,
+ FormatStrConverter);
if (Amt.isInvalid())
return true;
FS.setPrecision(Amt);
@@ -50,11 +53,14 @@ static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
return false;
}
-static bool ParseObjCFlags(FormatStringHandler &H, PrintfSpecifier &FS,
- const char *FlagBeg, const char *E, bool Warn) {
+static bool
+ParseObjCFlags(FormatStringHandler &H, PrintfSpecifier &FS, const char *FlagBeg,
+ const char *E, bool Warn,
+ const llvm::TextEncodingConverter &FormatStrConverter) {
StringRef Flag(FlagBeg, E - FlagBeg);
// Currently there is only one flag.
- if (Flag == "tt") {
+ if (Flag.size() == 2 && FormatStrConverter.convert(FlagBeg[0]) == 't' &&
+ FormatStrConverter.convert(FlagBeg[1]) == 't') {
FS.setHasObjCTechnicalTerm(FlagBeg);
return false;
}
@@ -81,6 +87,8 @@ ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E,
const char *Start = nullptr;
UpdateOnReturn<const char *> UpdateBeg(Beg, I);
+ const llvm::TextEncodingConverter &FormatStrConverter =
+ *Target.FormatStrConverter;
// Look for a '%' character that indicates the start of a format specifier.
for (; I != E; ++I) {
char c = *I;
@@ -89,7 +97,7 @@ ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E,
H.HandleNullChar(I);
return true;
}
- if (c == '%') {
+ if (FormatStrConverter.convert(c) == '%') {
Start = I++; // Record the start of the format specifier.
break;
}
@@ -107,7 +115,7 @@ ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E,
}
PrintfSpecifier FS;
- if (ParseArgPosition(H, FS, Start, I, E))
+ if (ParseArgPosition(H, FS, Start, I, E, FormatStrConverter))
return true;
if (I == E) {
@@ -117,13 +125,17 @@ ParsePrintfSpecifier(FormatStrin...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/196568
More information about the llvm-branch-commits
mailing list