[libc] [llvm] [clang-tools-extra] [libcxx] [compiler-rt] [lld] [libunwind] [mlir] [clang] [lldb] [flang] Fix clang to recognize new C23 modifiers %w and %wf when printing and scanning (PR #71771)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 4 23:49:26 PST 2023
https://github.com/ZijunZhaoCCK updated https://github.com/llvm/llvm-project/pull/71771
>From 06c4cf02dfb4b20c8349c5f3c7209276f6d56edf Mon Sep 17 00:00:00 2001
From: zijunzhao <zijunzhao at google.com>
Date: Thu, 9 Nov 2023 02:21:46 +0000
Subject: [PATCH 1/5] Fix clang to recognize new C23 modifiers %w and %wf when
printing
---
clang/include/clang/AST/FormatString.h | 16 +++++++-
clang/lib/AST/FormatString.cpp | 52 +++++++++++++++++++++++++-
clang/lib/AST/PrintfFormatString.cpp | 19 ++++++++++
clang/test/Sema/format-strings-ms.c | 28 ++++++++++++++
4 files changed, 112 insertions(+), 3 deletions(-)
diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index 5c4ad9baaef60..6a886854650f1 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -81,8 +81,10 @@ class LengthModifier {
AsLongDouble, // 'L'
AsAllocate, // for '%as', GNU extension to C90 scanf
AsMAllocate, // for '%ms', GNU extension to scanf
- AsWide, // 'w' (MSVCRT, like l but only for c, C, s, S, or Z
- AsWideChar = AsLong // for '%ls', only makes sense for printf
+ AsWide, // 'w' (1. MSVCRT, like l but only for c, C, s, S, or Z on windows
+ // 2. for b, d, i, o, u, x, or X when a size followed(like 8, 16, 32 or 64)
+ AsWideFast, // 'wf' (for b, d, i, o, u, x, or X)
+ AsWideChar = AsLong, // for '%ls', only makes sense for printf
};
LengthModifier()
@@ -417,6 +419,7 @@ class FormatSpecifier {
/// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
bool UsesPositionalArg;
unsigned argIndex;
+ unsigned size;
public:
FormatSpecifier(bool isPrintf)
: CS(isPrintf), VectorNumElts(false),
@@ -460,6 +463,15 @@ class FormatSpecifier {
FieldWidth = Amt;
}
+ void setSize(unsigned s) {
+ size = s;
+ }
+
+ unsigned getSize() const {
+ return size;
+ }
+
+
bool usesPositionalArg() const { return UsesPositionalArg; }
bool hasValidLengthModifier(const TargetInfo &Target,
diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp
index e0c9e18cfe3a2..ebc136e780717 100644
--- a/clang/lib/AST/FormatString.cpp
+++ b/clang/lib/AST/FormatString.cpp
@@ -286,7 +286,33 @@ clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
lmKind = LengthModifier::AsInt3264;
break;
case 'w':
- lmKind = LengthModifier::AsWide; ++I; break;
+ ++I;
+ if (I == E) return false;
+ if (*I == 'f') {
+ lmKind = LengthModifier::AsWideFast;
+ ++I;
+ } else {
+ lmKind = LengthModifier::AsWide;
+ }
+
+ if (I == E) return false;
+ int s = 0;
+ while (unsigned(*I - '0') <= 9) {
+ s = 10 * s + unsigned(*I - '0');
+ ++I;
+ }
+
+ // s == 0 is MSVCRT case, like l but only for c, C, s, S, or Z on windows
+ // s != 0 for b, d, i, o, u, x, or X when a size followed(like 8, 16, 32 or 64)
+ if (s != 0) {
+ std::set<int> supported_list {8, 16, 32, 64};
+ if (supported_list.count(s) == 0) {
+ return false;
+ }
+ FS.setSize(s);
+ }
+
+ break;
}
LengthModifier lm(lmPosition, lmKind);
FS.setLengthModifier(lm);
@@ -703,6 +729,8 @@ analyze_format_string::LengthModifier::toString() const {
return "m";
case AsWide:
return "w";
+ case AsWideFast:
+ return "wf";
case None:
return "";
}
@@ -970,6 +998,27 @@ bool FormatSpecifier::hasValidLengthModifier(const TargetInfo &Target,
case ConversionSpecifier::SArg:
case ConversionSpecifier::ZArg:
return Target.getTriple().isOSMSVCRT();
+ case ConversionSpecifier::bArg:
+ case ConversionSpecifier::dArg:
+ case ConversionSpecifier::iArg:
+ case ConversionSpecifier::oArg:
+ case ConversionSpecifier::uArg:
+ case ConversionSpecifier::xArg:
+ case ConversionSpecifier::XArg:
+ return true;
+ default:
+ return false;
+ }
+ case LengthModifier::AsWideFast:
+ switch (CS.getKind()) {
+ case ConversionSpecifier::bArg:
+ case ConversionSpecifier::dArg:
+ case ConversionSpecifier::iArg:
+ case ConversionSpecifier::oArg:
+ case ConversionSpecifier::uArg:
+ case ConversionSpecifier::xArg:
+ case ConversionSpecifier::XArg:
+ return true;
default:
return false;
}
@@ -996,6 +1045,7 @@ bool FormatSpecifier::hasStandardLengthModifier() const {
case LengthModifier::AsInt3264:
case LengthModifier::AsInt64:
case LengthModifier::AsWide:
+ case LengthModifier::AsWideFast:
case LengthModifier::AsShortLong: // ???
return false;
}
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index f0b9d0ecaf234..4b9111e8bcf50 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -537,7 +537,16 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
case LengthModifier::AsAllocate:
case LengthModifier::AsMAllocate:
+ return ArgType::Invalid();
case LengthModifier::AsWide:
+ case LengthModifier::AsWideFast:
+ int s = getSize();
+ bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
+ ArgType fastType = Ctx.getTargetInfo().getTriple().isArch64Bit() ? Ctx.LongLongTy : Ctx.IntTy;
+ if (s == 8) return Ctx.CharTy;
+ if (s == 16) return fast? fastType : Ctx.ShortTy;
+ if (s == 32) return fast? fastType : Ctx.IntTy;
+ if (s == 64) return Ctx.LongLongTy;
return ArgType::Invalid();
}
@@ -572,7 +581,16 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
case LengthModifier::AsAllocate:
case LengthModifier::AsMAllocate:
+ return ArgType::Invalid();
case LengthModifier::AsWide:
+ case LengthModifier::AsWideFast:
+ int s = getSize();
+ bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
+ ArgType fastType = Ctx.getTargetInfo().getTriple().isArch64Bit() ? Ctx.UnsignedLongLongTy : Ctx.UnsignedIntTy;
+ if (s == 8) return Ctx.UnsignedCharTy;
+ if (s == 16) return fast? fastType : Ctx.UnsignedShortTy;
+ if (s == 32) return fast? fastType : Ctx.UnsignedIntTy;
+ if (s == 64) return Ctx.UnsignedLongLongTy;
return ArgType::Invalid();
}
@@ -621,6 +639,7 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
case LengthModifier::AsInt3264:
case LengthModifier::AsInt64:
case LengthModifier::AsWide:
+ case LengthModifier::AsWideFast:
return ArgType::Invalid();
case LengthModifier::AsShortLong:
llvm_unreachable("only used for OpenCL which doesn not handle nArg");
diff --git a/clang/test/Sema/format-strings-ms.c b/clang/test/Sema/format-strings-ms.c
index 697032673d4e7..59ae930bb734e 100644
--- a/clang/test/Sema/format-strings-ms.c
+++ b/clang/test/Sema/format-strings-ms.c
@@ -1,6 +1,7 @@
// RUN: %clang_cc1 -fsyntax-only -verify -fms-compatibility -triple=i386-pc-win32 %s
// RUN: %clang_cc1 -fsyntax-only -verify -fms-compatibility -triple=i386-pc-win32 -Wformat-non-iso -DNON_ISO_WARNING %s
+#include <stdint.h>
int printf(const char *format, ...) __attribute__((format(printf, 1, 2)));
int scanf(const char * restrict, ...) ;
typedef unsigned short wchar_t;
@@ -85,4 +86,31 @@ void z_test(void *p) {
scanf("%Z", p); // expected-warning{{invalid conversion specifier 'Z'}}
}
+void w_int_test(void) {
+ int8_t a = 0b101;
+ int16_t b = 2;
+ uint32_t c = 123;
+ int64_t d = 0x3b;
+
+ // for %w
+ printf("%w8b", a); // expected-warning{{format specifies type 'char' but the argument has type 'int8_t' (aka 'signed char')}}
+ printf("%w16i", b);
+ printf("%w32u", c);
+ printf("%w64x", d);
+
+}
+
+void wf_test(void) {
+ int_fast8_t a = 0b101;
+ uint_fast16_t b = 2;
+ int_fast32_t c = 021;
+ int_fast64_t d = 0x3a;
+
+ // for %wf
+ printf("%wf8b", a); // expected-warning{{format specifies type 'char' but the argument has type 'int_fast8_t' (aka 'signed char')}}
+ printf("%wf16u", b);
+ printf("%wf32o", c);
+ printf("%wf64X", d);
+}
+
#endif
>From 2ec84a88a9ba6e4576a855b419dd6bafa9f3d721 Mon Sep 17 00:00:00 2001
From: zijunzhao <zijunzhao at google.com>
Date: Tue, 14 Nov 2023 00:38:41 +0000
Subject: [PATCH 2/5] Update code and tests. %w and %wf are available in
printf() and scanf()
---
clang/include/clang/AST/FormatString.h | 4 ++-
clang/lib/AST/PrintfFormatString.cpp | 34 +++++++++++++++++---------
clang/lib/AST/ScanfFormatString.cpp | 14 +++++++++--
clang/test/Sema/format-strings-ms.c | 25 ++++++++++++-------
4 files changed, 53 insertions(+), 24 deletions(-)
diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index 6a886854650f1..2e48a8ddfde4d 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -471,7 +471,6 @@ class FormatSpecifier {
return size;
}
-
bool usesPositionalArg() const { return UsesPositionalArg; }
bool hasValidLengthModifier(const TargetInfo &Target,
@@ -792,6 +791,9 @@ bool parseFormatStringHasFormattingSpecifiers(const char *Begin,
const LangOptions &LO,
const TargetInfo &Target);
+ArgType wToArgType(int size, bool fast, ASTContext &C);
+ArgType wToArgTypeUnsigned(int size, bool fast, ASTContext &C);
+
} // end analyze_format_string namespace
} // end clang namespace
#endif
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index 4b9111e8bcf50..e6e47403198af 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -484,6 +484,26 @@ bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers(
return false;
}
+ArgType clang::analyze_format_string::wToArgType(
+ int size, bool fast, ASTContext &C) {
+ ArgType fastType = C.getTargetInfo().getTriple().isArch64Bit() ? C.LongLongTy : C.IntTy;
+ if (size == 8) return C.CharTy;
+ if (size == 16) return fast? fastType : C.ShortTy;
+ if (size == 32) return fast? fastType : C.IntTy;
+ if (size == 64) return C.LongLongTy;
+ return ArgType::Invalid();
+}
+
+ArgType clang::analyze_format_string::wToArgTypeUnsigned(
+ int size, bool fast, ASTContext &C) {
+ ArgType fastType = C.getTargetInfo().getTriple().isArch64Bit() ? C.UnsignedLongLongTy : C.UnsignedIntTy;
+ if (size == 8) return C.UnsignedCharTy;
+ if (size == 16) return fast? fastType : C.UnsignedShortTy;
+ if (size == 32) return fast? fastType : C.UnsignedIntTy;
+ if (size == 64) return C.UnsignedLongLongTy;
+ return ArgType::Invalid();
+}
+
//===----------------------------------------------------------------------===//
// Methods on PrintfSpecifier.
//===----------------------------------------------------------------------===//
@@ -542,12 +562,7 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
case LengthModifier::AsWideFast:
int s = getSize();
bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
- ArgType fastType = Ctx.getTargetInfo().getTriple().isArch64Bit() ? Ctx.LongLongTy : Ctx.IntTy;
- if (s == 8) return Ctx.CharTy;
- if (s == 16) return fast? fastType : Ctx.ShortTy;
- if (s == 32) return fast? fastType : Ctx.IntTy;
- if (s == 64) return Ctx.LongLongTy;
- return ArgType::Invalid();
+ return clang::analyze_format_string::wToArgType(s, fast, Ctx);
}
if (CS.isUIntArg())
@@ -586,12 +601,7 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
case LengthModifier::AsWideFast:
int s = getSize();
bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
- ArgType fastType = Ctx.getTargetInfo().getTriple().isArch64Bit() ? Ctx.UnsignedLongLongTy : Ctx.UnsignedIntTy;
- if (s == 8) return Ctx.UnsignedCharTy;
- if (s == 16) return fast? fastType : Ctx.UnsignedShortTy;
- if (s == 32) return fast? fastType : Ctx.UnsignedIntTy;
- if (s == 64) return Ctx.UnsignedLongLongTy;
- return ArgType::Invalid();
+ return clang::analyze_format_string::wToArgTypeUnsigned(s, fast, Ctx);
}
if (CS.isDoubleArg()) {
diff --git a/clang/lib/AST/ScanfFormatString.cpp b/clang/lib/AST/ScanfFormatString.cpp
index 64c430e623b57..e640bc2de4d25 100644
--- a/clang/lib/AST/ScanfFormatString.cpp
+++ b/clang/lib/AST/ScanfFormatString.cpp
@@ -261,9 +261,13 @@ ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
case LengthModifier::AsMAllocate:
case LengthModifier::AsInt32:
case LengthModifier::AsInt3264:
- case LengthModifier::AsWide:
case LengthModifier::AsShortLong:
return ArgType::Invalid();
+ case LengthModifier::AsWide:
+ case LengthModifier::AsWideFast:
+ int s = getSize();
+ bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
+ return clang::analyze_format_string::wToArgType(s, fast, Ctx);
}
llvm_unreachable("Unsupported LengthModifier Type");
@@ -303,9 +307,15 @@ ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
case LengthModifier::AsMAllocate:
case LengthModifier::AsInt32:
case LengthModifier::AsInt3264:
- case LengthModifier::AsWide:
case LengthModifier::AsShortLong:
return ArgType::Invalid();
+ case LengthModifier::AsWide:
+ case LengthModifier::AsWideFast:
+ int s = getSize();
+ bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
+ if (CS.getKind() == ConversionSpecifier::uArg or CS.getKind() == ConversionSpecifier::UArg)
+ return clang::analyze_format_string::wToArgTypeUnsigned(s, fast, Ctx);
+ return clang::analyze_format_string::wToArgType(s, fast, Ctx);
}
llvm_unreachable("Unsupported LengthModifier Type");
diff --git a/clang/test/Sema/format-strings-ms.c b/clang/test/Sema/format-strings-ms.c
index 59ae930bb734e..bca824533d128 100644
--- a/clang/test/Sema/format-strings-ms.c
+++ b/clang/test/Sema/format-strings-ms.c
@@ -87,30 +87,37 @@ void z_test(void *p) {
}
void w_int_test(void) {
- int8_t a = 0b101;
- int16_t b = 2;
- uint32_t c = 123;
- int64_t d = 0x3b;
+ int8_t a;
+ int16_t b;
+ uint32_t c;
+ int64_t d;
// for %w
printf("%w8b", a); // expected-warning{{format specifies type 'char' but the argument has type 'int8_t' (aka 'signed char')}}
printf("%w16i", b);
printf("%w32u", c);
printf("%w64x", d);
-
+ scanf("%w8b", a); // expected-warning{{format specifies type 'char' but the argument has type 'int'}}
+ scanf("%w16i", b); // expected-warning{{format specifies type 'short' but the argument has type 'int'}}
+ scanf("%w32u", c);
+ scanf("%w64x", d);
}
void wf_test(void) {
- int_fast8_t a = 0b101;
- uint_fast16_t b = 2;
- int_fast32_t c = 021;
- int_fast64_t d = 0x3a;
+ int_fast8_t a;
+ uint_fast16_t b;
+ int_fast32_t c;
+ int_fast64_t d;
// for %wf
printf("%wf8b", a); // expected-warning{{format specifies type 'char' but the argument has type 'int_fast8_t' (aka 'signed char')}}
printf("%wf16u", b);
printf("%wf32o", c);
printf("%wf64X", d);
+ scanf("%wf8b", a); // expected-warning{{format specifies type 'char' but the argument has type 'int'}}
+ scanf("%wf16u", b);
+ scanf("%wf32o", c);
+ scanf("%wf64X", d);
}
#endif
>From 1431133f1e48bb58fa407e4a6d985fca24e9410b Mon Sep 17 00:00:00 2001
From: zijunzhao <zijunzhao at google.com>
Date: Tue, 14 Nov 2023 00:38:41 +0000
Subject: [PATCH 3/5] Update code and tests. %w and %wf are available in
printf() and scanf()
---
clang/include/clang/AST/FormatString.h | 14 ++++++-------
clang/lib/AST/FormatString.cpp | 2 +-
clang/lib/AST/PrintfFormatString.cpp | 28 +++++++++++++-------------
clang/lib/AST/ScanfFormatString.cpp | 4 ++--
4 files changed, 24 insertions(+), 24 deletions(-)
diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index 2e48a8ddfde4d..dba973ffcaa4e 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -419,7 +419,7 @@ class FormatSpecifier {
/// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
bool UsesPositionalArg;
unsigned argIndex;
- unsigned size;
+ unsigned ExplicitlyFixedSize;
public:
FormatSpecifier(bool isPrintf)
: CS(isPrintf), VectorNumElts(false),
@@ -463,12 +463,12 @@ class FormatSpecifier {
FieldWidth = Amt;
}
- void setSize(unsigned s) {
- size = s;
+ void setExplicitlyFixedSize(unsigned s) {
+ ExplicitlyFixedSize = s;
}
- unsigned getSize() const {
- return size;
+ unsigned getExplicitlyFixedSize() const {
+ return ExplicitlyFixedSize;
}
bool usesPositionalArg() const { return UsesPositionalArg; }
@@ -791,8 +791,8 @@ bool parseFormatStringHasFormattingSpecifiers(const char *Begin,
const LangOptions &LO,
const TargetInfo &Target);
-ArgType wToArgType(int size, bool fast, ASTContext &C);
-ArgType wToArgTypeUnsigned(int size, bool fast, ASTContext &C);
+ArgType wToArgType(int Size, bool Fast, ASTContext &C);
+ArgType wToArgTypeUnsigned(int Size, bool Fast, ASTContext &C);
} // end analyze_format_string namespace
} // end clang namespace
diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp
index ebc136e780717..88d0f1c98ed0d 100644
--- a/clang/lib/AST/FormatString.cpp
+++ b/clang/lib/AST/FormatString.cpp
@@ -309,7 +309,7 @@ clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
if (supported_list.count(s) == 0) {
return false;
}
- FS.setSize(s);
+ FS.setExplicitlyFixedSize(s);
}
break;
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index e6e47403198af..ad92fb3aab97d 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -485,22 +485,22 @@ bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers(
}
ArgType clang::analyze_format_string::wToArgType(
- int size, bool fast, ASTContext &C) {
- ArgType fastType = C.getTargetInfo().getTriple().isArch64Bit() ? C.LongLongTy : C.IntTy;
- if (size == 8) return C.CharTy;
- if (size == 16) return fast? fastType : C.ShortTy;
- if (size == 32) return fast? fastType : C.IntTy;
- if (size == 64) return C.LongLongTy;
+ int Size, bool Fast, ASTContext &C) {
+ ArgType FastType = C.getTargetInfo().getTriple().isArch64Bit() ? C.LongLongTy : C.IntTy;
+ if (Size == 8) return C.CharTy;
+ if (Size == 16) return Fast? FastType : C.ShortTy;
+ if (Size == 32) return Fast? FastType : C.IntTy;
+ if (Size == 64) return C.LongLongTy;
return ArgType::Invalid();
}
ArgType clang::analyze_format_string::wToArgTypeUnsigned(
- int size, bool fast, ASTContext &C) {
- ArgType fastType = C.getTargetInfo().getTriple().isArch64Bit() ? C.UnsignedLongLongTy : C.UnsignedIntTy;
- if (size == 8) return C.UnsignedCharTy;
- if (size == 16) return fast? fastType : C.UnsignedShortTy;
- if (size == 32) return fast? fastType : C.UnsignedIntTy;
- if (size == 64) return C.UnsignedLongLongTy;
+ int Size, bool Fast, ASTContext &C) {
+ ArgType FastType = C.getTargetInfo().getTriple().isArch64Bit() ? C.UnsignedLongLongTy : C.UnsignedIntTy;
+ if (Size == 8) return C.UnsignedCharTy;
+ if (Size == 16) return Fast? FastType : C.UnsignedShortTy;
+ if (Size == 32) return Fast? FastType : C.UnsignedIntTy;
+ if (Size == 64) return C.UnsignedLongLongTy;
return ArgType::Invalid();
}
@@ -560,7 +560,7 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
return ArgType::Invalid();
case LengthModifier::AsWide:
case LengthModifier::AsWideFast:
- int s = getSize();
+ int s = getExplicitlyFixedSize();
bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
return clang::analyze_format_string::wToArgType(s, fast, Ctx);
}
@@ -599,7 +599,7 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
return ArgType::Invalid();
case LengthModifier::AsWide:
case LengthModifier::AsWideFast:
- int s = getSize();
+ int s = getExplicitlyFixedSize();
bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
return clang::analyze_format_string::wToArgTypeUnsigned(s, fast, Ctx);
}
diff --git a/clang/lib/AST/ScanfFormatString.cpp b/clang/lib/AST/ScanfFormatString.cpp
index e640bc2de4d25..6f856f1b0bedb 100644
--- a/clang/lib/AST/ScanfFormatString.cpp
+++ b/clang/lib/AST/ScanfFormatString.cpp
@@ -265,7 +265,7 @@ ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
return ArgType::Invalid();
case LengthModifier::AsWide:
case LengthModifier::AsWideFast:
- int s = getSize();
+ int s = getExplicitlyFixedSize();
bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
return clang::analyze_format_string::wToArgType(s, fast, Ctx);
}
@@ -311,7 +311,7 @@ ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
return ArgType::Invalid();
case LengthModifier::AsWide:
case LengthModifier::AsWideFast:
- int s = getSize();
+ int s = getExplicitlyFixedSize();
bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
if (CS.getKind() == ConversionSpecifier::uArg or CS.getKind() == ConversionSpecifier::UArg)
return clang::analyze_format_string::wToArgTypeUnsigned(s, fast, Ctx);
>From b4ff31c11d64079456cda63bed1a734958754b10 Mon Sep 17 00:00:00 2001
From: zijunzhao <zijunzhao at google.com>
Date: Sat, 2 Dec 2023 01:35:09 +0000
Subject: [PATCH 4/5] 1. add one more warning about unsupported sizes 2.
reformat
---
clang/include/clang/AST/FormatString.h | 16 +++++++++++---
.../clang/Basic/DiagnosticSemaKinds.td | 4 ++++
clang/lib/AST/FormatString.cpp | 13 ++++++-----
clang/lib/AST/PrintfFormatString.cpp | 6 ++---
clang/lib/AST/ScanfFormatString.cpp | 6 ++---
clang/lib/Sema/SemaChecking.cpp | 22 +++++++++++++++++++
clang/test/Sema/format-strings-ms.c | 8 +++++++
7 files changed, 60 insertions(+), 15 deletions(-)
diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index dba973ffcaa4e..c3f761ed03d9f 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -420,10 +420,12 @@ class FormatSpecifier {
bool UsesPositionalArg;
unsigned argIndex;
unsigned ExplicitlyFixedSize;
+ bool ExplicitlyFixedSizeValid;
+
public:
FormatSpecifier(bool isPrintf)
: CS(isPrintf), VectorNumElts(false),
- UsesPositionalArg(false), argIndex(0) {}
+ UsesPositionalArg(false), argIndex(0), ExplicitlyFixedSizeValid(true) {}
void setLengthModifier(LengthModifier lm) {
LM = lm;
@@ -463,8 +465,8 @@ class FormatSpecifier {
FieldWidth = Amt;
}
- void setExplicitlyFixedSize(unsigned s) {
- ExplicitlyFixedSize = s;
+ void setExplicitlyFixedSize(unsigned S) {
+ ExplicitlyFixedSize = S;
}
unsigned getExplicitlyFixedSize() const {
@@ -478,6 +480,14 @@ class FormatSpecifier {
bool hasStandardLengthModifier() const;
+ void setExplicitlyFixedSizeValid(bool valid) {
+ ExplicitlyFixedSizeValid = valid;
+ }
+
+ bool isExplicitlyFixedSizeSupported() const {
+ return ExplicitlyFixedSizeValid;
+ }
+
std::optional<LengthModifier> getCorrectedLengthModifier() const;
bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 6dfb2d7195203..5d1dcb66d2990 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9707,6 +9707,10 @@ def warn_missing_format_string : Warning<
def warn_scanf_nonzero_width : Warning<
"zero field width in scanf format string is unused">,
InGroup<Format>;
+def warn_format_conversion_size_unsupported: Warning<
+ "format specifies %select{an exact-|a fastest-}0 width integer type with "
+ "invalid bit-width %1">,
+ InGroup<Format>;
def warn_format_conversion_argument_type_mismatch : Warning<
"format specifies type %0 but the argument has "
"%select{type|underlying type}2 %1">,
diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp
index 88d0f1c98ed0d..39359c2b18fb4 100644
--- a/clang/lib/AST/FormatString.cpp
+++ b/clang/lib/AST/FormatString.cpp
@@ -297,18 +297,19 @@ clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
if (I == E) return false;
int s = 0;
+ bool MSVCRT = true;
while (unsigned(*I - '0') <= 9) {
+ MSVCRT = false;
s = 10 * s + unsigned(*I - '0');
++I;
}
- // s == 0 is MSVCRT case, like l but only for c, C, s, S, or Z on windows
- // s != 0 for b, d, i, o, u, x, or X when a size followed(like 8, 16, 32 or 64)
- if (s != 0) {
+ // MSVCRT == true is MSVCRT case, like l but only for c, C, s, S, or Z on windows
+ // MSVCRT == false for b, d, i, o, u, x, or X when a size followed(like 8, 16, 32 or 64)
+ if (!MSVCRT) {
std::set<int> supported_list {8, 16, 32, 64};
- if (supported_list.count(s) == 0) {
- return false;
- }
+ if (supported_list.count(s) == 0)
+ FS.setExplicitlyFixedSizeValid(false);
FS.setExplicitlyFixedSize(s);
}
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index ad92fb3aab97d..d4d9a0596620a 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -560,9 +560,9 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
return ArgType::Invalid();
case LengthModifier::AsWide:
case LengthModifier::AsWideFast:
- int s = getExplicitlyFixedSize();
- bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
- return clang::analyze_format_string::wToArgType(s, fast, Ctx);
+ int S = getExplicitlyFixedSize();
+ bool FAST = LM.getKind() == LengthModifier::AsWideFast ? true : false;
+ return clang::analyze_format_string::wToArgType(S, FAST, Ctx);
}
if (CS.isUIntArg())
diff --git a/clang/lib/AST/ScanfFormatString.cpp b/clang/lib/AST/ScanfFormatString.cpp
index 6f856f1b0bedb..de261a303f322 100644
--- a/clang/lib/AST/ScanfFormatString.cpp
+++ b/clang/lib/AST/ScanfFormatString.cpp
@@ -265,9 +265,9 @@ ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
return ArgType::Invalid();
case LengthModifier::AsWide:
case LengthModifier::AsWideFast:
- int s = getExplicitlyFixedSize();
- bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
- return clang::analyze_format_string::wToArgType(s, fast, Ctx);
+ int S = getExplicitlyFixedSize();
+ bool FAST = LM.getKind() == LengthModifier::AsWideFast ? true : false;
+ return clang::analyze_format_string::wToArgType(S, FAST, Ctx);
}
llvm_unreachable("Unsupported LengthModifier Type");
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 77c8334f3ca25..7e2f5dc6708c4 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -11680,6 +11680,17 @@ bool CheckPrintfHandler::HandlePrintfSpecifier(
if (!FS.hasStandardConversionSpecifier(S.getLangOpts()))
HandleNonStandardConversionSpecifier(CS, startSpecifier, specifierLen);
+ // Check the explicitly fixed size is supported
+ if (!FS.isExplicitlyFixedSizeSupported()){
+ EmitFormatDiagnostic(S.PDiag(
+ diag::warn_format_conversion_size_unsupported)
+ << FS.getLengthModifier().toString()
+ << FS.getExplicitlyFixedSize(),
+ getLocationOfByte(startSpecifier),
+ /*IsStringLocation*/true,
+ getSpecifierRange(startSpecifier, specifierLen));
+ }
+
// The remaining checks depend on the data arguments.
if (ArgPassingKind == Sema::FAPK_VAList)
return true;
@@ -12289,6 +12300,17 @@ bool CheckScanfHandler::HandleScanfSpecifier(
else if (!FS.hasStandardLengthConversionCombination())
HandleInvalidLengthModifier(FS, CS, startSpecifier, specifierLen,
diag::warn_format_non_standard_conversion_spec);
+
+ // Check the explicitly fixed size is supported
+ if (!FS.isExplicitlyFixedSizeSupported()){
+ EmitFormatDiagnostic(S.PDiag(
+ diag::warn_format_conversion_size_unsupported)
+ << FS.getLengthModifier().toString()
+ << FS.getExplicitlyFixedSize(),
+ getLocationOfByte(startSpecifier),
+ /*IsStringLocation*/true,
+ getSpecifierRange(startSpecifier, specifierLen));
+ }
if (!FS.hasStandardConversionSpecifier(S.getLangOpts()))
HandleNonStandardConversionSpecifier(CS, startSpecifier, specifierLen);
diff --git a/clang/test/Sema/format-strings-ms.c b/clang/test/Sema/format-strings-ms.c
index bca824533d128..e6172ee436114 100644
--- a/clang/test/Sema/format-strings-ms.c
+++ b/clang/test/Sema/format-strings-ms.c
@@ -101,6 +101,10 @@ void w_int_test(void) {
scanf("%w16i", b); // expected-warning{{format specifies type 'short' but the argument has type 'int'}}
scanf("%w32u", c);
scanf("%w64x", d);
+
+ // unsupported size
+ printf("%w92d", a); // expected-warning{{format specifies w width integer type with invalid bit-width 92}}
+ scanf("%w0i", b); // expected-warning{{format specifies w width integer type with invalid bit-width 0}}
}
void wf_test(void) {
@@ -118,6 +122,10 @@ void wf_test(void) {
scanf("%wf16u", b);
scanf("%wf32o", c);
scanf("%wf64X", d);
+
+ // unsupported size
+ printf("%wf0d", a); // expected-warning{{format specifies wf width integer type with invalid bit-width 0}}
+ scanf("%wf35u", b); // expected-warning{{format specifies wf width integer type with invalid bit-width 35}}
}
#endif
>From a5b2d771fc51df5018765896207b76cfee88e39b Mon Sep 17 00:00:00 2001
From: zijunzhao <zijunzhao at google.com>
Date: Tue, 5 Dec 2023 07:48:50 +0000
Subject: [PATCH 5/5] Add getFastIntTypeByWidth() function
---
clang/include/clang/AST/FormatString.h | 3 +-
clang/include/clang/Basic/TargetInfo.h | 4 +++
clang/lib/AST/PrintfFormatString.cpp | 48 ++++++++++++++------------
clang/lib/AST/ScanfFormatString.cpp | 10 +++---
clang/lib/Basic/TargetInfo.cpp | 25 ++++++++++++++
clang/test/Sema/format-strings-ms.c | 8 ++---
6 files changed, 65 insertions(+), 33 deletions(-)
diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index c3f761ed03d9f..7add829ee1cca 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -801,8 +801,7 @@ bool parseFormatStringHasFormattingSpecifiers(const char *Begin,
const LangOptions &LO,
const TargetInfo &Target);
-ArgType wToArgType(int Size, bool Fast, ASTContext &C);
-ArgType wToArgTypeUnsigned(int Size, bool Fast, ASTContext &C);
+ArgType wToArgType(int Size, bool IsSigned, bool Fast, ASTContext &C);
} // end analyze_format_string namespace
} // end clang namespace
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 41f3c2e403cbe..c1e79b5d759ef 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -442,6 +442,10 @@ class TargetInfo : public TransferrableTargetInfo,
virtual IntType getLeastIntTypeByWidth(unsigned BitWidth,
bool IsSigned) const;
+ /// Return the fastest integer type with at least the specified width.
+ virtual IntType getFastIntTypeByWidth(unsigned BitWidth,
+ bool IsSigned, bool Fast) const;
+
/// Return floating point type with specified width. On PPC, there are
/// three possible types for 128-bit floating point: "PPC double-double",
/// IEEE 754R quad precision, and "long double" (which under the covers
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index d4d9a0596620a..f41fba78b5d05 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -485,23 +485,27 @@ bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers(
}
ArgType clang::analyze_format_string::wToArgType(
- int Size, bool Fast, ASTContext &C) {
- ArgType FastType = C.getTargetInfo().getTriple().isArch64Bit() ? C.LongLongTy : C.IntTy;
- if (Size == 8) return C.CharTy;
- if (Size == 16) return Fast? FastType : C.ShortTy;
- if (Size == 32) return Fast? FastType : C.IntTy;
- if (Size == 64) return C.LongLongTy;
- return ArgType::Invalid();
-}
-
-ArgType clang::analyze_format_string::wToArgTypeUnsigned(
- int Size, bool Fast, ASTContext &C) {
- ArgType FastType = C.getTargetInfo().getTriple().isArch64Bit() ? C.UnsignedLongLongTy : C.UnsignedIntTy;
- if (Size == 8) return C.UnsignedCharTy;
- if (Size == 16) return Fast? FastType : C.UnsignedShortTy;
- if (Size == 32) return Fast? FastType : C.UnsignedIntTy;
- if (Size == 64) return C.UnsignedLongLongTy;
- return ArgType::Invalid();
+ int Size, bool IsSigned, bool Fast, ASTContext &C) {
+ switch (C.getTargetInfo().getFastIntTypeByWidth(Size, IsSigned, Fast)) {
+ case TargetInfo::SignedChar:
+ return C.SignedCharTy;
+ case TargetInfo::UnsignedChar:
+ return C.UnsignedCharTy;
+ case TargetInfo::SignedShort:
+ return C.ShortTy;
+ case TargetInfo::UnsignedShort:
+ return C.UnsignedShortTy;
+ case TargetInfo::SignedInt:
+ return C.IntTy;
+ case TargetInfo::UnsignedInt:
+ return C.UnsignedIntTy;
+ case TargetInfo::SignedLongLong:
+ return C.LongLongTy;
+ case TargetInfo::UnsignedLongLong:
+ return C.UnsignedLongLongTy;
+ default:
+ return ArgType::Invalid();
+ }
}
//===----------------------------------------------------------------------===//
@@ -561,8 +565,8 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
case LengthModifier::AsWide:
case LengthModifier::AsWideFast:
int S = getExplicitlyFixedSize();
- bool FAST = LM.getKind() == LengthModifier::AsWideFast ? true : false;
- return clang::analyze_format_string::wToArgType(S, FAST, Ctx);
+ bool Fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
+ return clang::analyze_format_string::wToArgType(S, true, Fast, Ctx);
}
if (CS.isUIntArg())
@@ -599,9 +603,9 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
return ArgType::Invalid();
case LengthModifier::AsWide:
case LengthModifier::AsWideFast:
- int s = getExplicitlyFixedSize();
- bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
- return clang::analyze_format_string::wToArgTypeUnsigned(s, fast, Ctx);
+ int S = getExplicitlyFixedSize();
+ bool Fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
+ return clang::analyze_format_string::wToArgType(S, false, Fast, Ctx);
}
if (CS.isDoubleArg()) {
diff --git a/clang/lib/AST/ScanfFormatString.cpp b/clang/lib/AST/ScanfFormatString.cpp
index de261a303f322..52bb1de117fe1 100644
--- a/clang/lib/AST/ScanfFormatString.cpp
+++ b/clang/lib/AST/ScanfFormatString.cpp
@@ -267,7 +267,7 @@ ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
case LengthModifier::AsWideFast:
int S = getExplicitlyFixedSize();
bool FAST = LM.getKind() == LengthModifier::AsWideFast ? true : false;
- return clang::analyze_format_string::wToArgType(S, FAST, Ctx);
+ return clang::analyze_format_string::wToArgType(S, true, FAST, Ctx);
}
llvm_unreachable("Unsupported LengthModifier Type");
@@ -311,11 +311,11 @@ ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
return ArgType::Invalid();
case LengthModifier::AsWide:
case LengthModifier::AsWideFast:
- int s = getExplicitlyFixedSize();
- bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
+ int S = getExplicitlyFixedSize();
+ bool Fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
if (CS.getKind() == ConversionSpecifier::uArg or CS.getKind() == ConversionSpecifier::UArg)
- return clang::analyze_format_string::wToArgTypeUnsigned(s, fast, Ctx);
- return clang::analyze_format_string::wToArgType(s, fast, Ctx);
+ return clang::analyze_format_string::wToArgType(S, false, Fast, Ctx);
+ return clang::analyze_format_string::wToArgType(S, true, Fast, Ctx);
}
llvm_unreachable("Unsupported LengthModifier Type");
diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp
index 6cd5d618a4aca..fbc842d492ec3 100644
--- a/clang/lib/Basic/TargetInfo.cpp
+++ b/clang/lib/Basic/TargetInfo.cpp
@@ -312,6 +312,31 @@ TargetInfo::IntType TargetInfo::getLeastIntTypeByWidth(unsigned BitWidth,
return NoInt;
}
+TargetInfo::IntType TargetInfo::getFastIntTypeByWidth(unsigned BitWidth,
+ bool IsSigned, bool Fast)
+ const {
+ IntType SignedFastType = getTriple().isArch64Bit() ? SignedLongLong : SignedInt;
+ IntType UnSignedFastType = getTriple().isArch64Bit() ?
+ UnsignedLongLong : UnsignedInt;
+ if (getCharWidth() == BitWidth)
+ return IsSigned ? SignedChar : UnsignedChar;
+ if (getShortWidth() == BitWidth) {
+ if (Fast)
+ return IsSigned ? SignedFastType : UnSignedFastType;
+ else
+ return IsSigned ? SignedShort : UnsignedShort;
+ }
+ if (getIntWidth() == BitWidth) {
+ if (Fast)
+ return IsSigned ? SignedFastType : UnSignedFastType;
+ else
+ return IsSigned ? SignedInt : UnsignedInt;
+ }
+ if (getLongLongWidth() == BitWidth)
+ return IsSigned ? SignedLongLong : UnsignedLongLong;
+ return NoInt;
+}
+
FloatModeKind TargetInfo::getRealTypeByWidth(unsigned BitWidth,
FloatModeKind ExplicitType) const {
if (getHalfWidth() == BitWidth)
diff --git a/clang/test/Sema/format-strings-ms.c b/clang/test/Sema/format-strings-ms.c
index e6172ee436114..89f267a0d4fcd 100644
--- a/clang/test/Sema/format-strings-ms.c
+++ b/clang/test/Sema/format-strings-ms.c
@@ -93,11 +93,11 @@ void w_int_test(void) {
int64_t d;
// for %w
- printf("%w8b", a); // expected-warning{{format specifies type 'char' but the argument has type 'int8_t' (aka 'signed char')}}
+ printf("%w8b", a);
printf("%w16i", b);
printf("%w32u", c);
printf("%w64x", d);
- scanf("%w8b", a); // expected-warning{{format specifies type 'char' but the argument has type 'int'}}
+ scanf("%w8b", a); // expected-warning{{format specifies type 'signed char' but the argument has type 'int'}}
scanf("%w16i", b); // expected-warning{{format specifies type 'short' but the argument has type 'int'}}
scanf("%w32u", c);
scanf("%w64x", d);
@@ -114,11 +114,11 @@ void wf_test(void) {
int_fast64_t d;
// for %wf
- printf("%wf8b", a); // expected-warning{{format specifies type 'char' but the argument has type 'int_fast8_t' (aka 'signed char')}}
+ printf("%wf8b", a);
printf("%wf16u", b);
printf("%wf32o", c);
printf("%wf64X", d);
- scanf("%wf8b", a); // expected-warning{{format specifies type 'char' but the argument has type 'int'}}
+ scanf("%wf8b", a); // expected-warning{{format specifies type 'signed char' but the argument has type 'int'}}
scanf("%wf16u", b);
scanf("%wf32o", c);
scanf("%wf64X", d);
More information about the llvm-commits
mailing list