[compiler-rt] r174882 - [sanitizer] Scanf parser improvements.
Evgeniy Stepanov
eugeni.stepanov at gmail.com
Mon Feb 11 06:04:24 PST 2013
Author: eugenis
Date: Mon Feb 11 08:04:24 2013
New Revision: 174882
URL: http://llvm.org/viewvc/llvm-project?rev=174882&view=rev
Log:
[sanitizer] Scanf parser improvements.
Handle %a in cases when it is unambiguous.
Handle %m.
Patch by Jakub Jelinek.
Modified:
compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors_scanf.inc
compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_scanf_interceptor_test.cc
Modified: compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors_scanf.inc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors_scanf.inc?rev=174882&r1=174881&r2=174882&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors_scanf.inc (original)
+++ compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors_scanf.inc Mon Feb 11 08:04:24 2013
@@ -20,11 +20,12 @@
struct ScanfDirective {
int argIdx; // argument index, or -1 of not specified ("%n$")
- bool suppressed; // suppress assignment ("*")
int fieldWidth;
+ bool suppressed; // suppress assignment ("*")
bool allocate; // allocate space ("m")
char lengthModifier[2];
char convSpecifier;
+ bool maybeGnuMalloc;
};
static const char *parse_number(const char *p, int *out) {
@@ -121,6 +122,31 @@ static const char *scanf_parse_next(cons
// Consume the closing ']'.
++p;
}
+ // This is unfortunately ambiguous between old GNU extension
+ // of %as, %aS and %a[...] and newer POSIX %a followed by
+ // letters s, S or [.
+ if (dir->convSpecifier == 'a' && !dir->lengthModifier[0]) {
+ if (*p == 's' || *p == 'S') {
+ dir->maybeGnuMalloc = true;
+ ++p;
+ } else if (*p == '[') {
+ // Watch for %a[h-j%d], if % appears in the
+ // [...] range, then we need to give up, we don't know
+ // if scanf will parse it as POSIX %a [h-j %d ] or
+ // GNU allocation of string with range dh-j plus %.
+ const char *q = p + 1;
+ if (*q == '^')
+ ++q;
+ if (*q == ']')
+ ++q;
+ while (*q && *q != ']' && *q != '%')
+ ++q;
+ if (*q == 0 || *q == '%')
+ return 0;
+ p = q + 1; // Consume the closing ']'.
+ dir->maybeGnuMalloc = true;
+ }
+ }
break;
}
return p;
@@ -133,9 +159,7 @@ static bool scanf_is_integer_conv(char c
// Returns true if the character is an floating point conversion specifier.
static bool scanf_is_float_conv(char c) {
- return char_is_one_of(c, "AeEfFgG");
- // NOTE: c == 'a' is ambiguous between POSIX and GNU and, therefore,
- // unsupported.
+ return char_is_one_of(c, "aAeEfFgG");
}
// Returns string output character size for string-like conversions,
@@ -170,6 +194,21 @@ enum ScanfStoreSize {
// Returns the store size of a scanf directive (if >0), or a value of
// ScanfStoreSize.
static int scanf_get_store_size(ScanfDirective *dir) {
+ if (dir->allocate) {
+ if (!char_is_one_of(dir->convSpecifier, "cCsS["))
+ return SSS_INVALID;
+ return sizeof(char *);
+ }
+
+ if (dir->maybeGnuMalloc) {
+ if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
+ return SSS_INVALID;
+ // This is ambiguous, so check the smaller size of char * (if it is
+ // a GNU extension of %as, %aS or %a[...]) and float (if it is
+ // POSIX %a followed by s, S or [ letters).
+ return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
+ }
+
if (scanf_is_integer_conv(dir->convSpecifier)) {
switch (dir->lengthModifier[0]) {
case 'h':
@@ -258,11 +297,6 @@ static void scanf_common(void *ctx, cons
}
if (dir.suppressed)
continue;
- if (dir.allocate) {
- // Unsupported;
- continue;
- }
-
int size = scanf_get_store_size(&dir);
if (size == SSS_INVALID)
break;
Modified: compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_scanf_interceptor_test.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_scanf_interceptor_test.cc?rev=174882&r1=174881&r2=174882&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_scanf_interceptor_test.cc (original)
+++ compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_scanf_interceptor_test.cc Mon Feb 11 08:04:24 2013
@@ -60,7 +60,9 @@ TEST(SanitizerCommonInterceptors, Scanf)
const unsigned S = sizeof(short); // NOLINT
const unsigned C = sizeof(char); // NOLINT
const unsigned D = sizeof(double); // NOLINT
+ const unsigned LD = sizeof(long double); // NOLINT
const unsigned F = sizeof(float); // NOLINT
+ const unsigned P = sizeof(char*); // NOLINT
testScanf("%d", 1, I);
testScanf("%d%d%d", 3, I, I, I);
@@ -102,12 +104,25 @@ TEST(SanitizerCommonInterceptors, Scanf)
testScanf("%c%d", 2, C, I);
testScanf("%A%lf", 2, F, D);
- // Unsupported stuff.
+ testScanf("%ms %Lf", 2, P, LD);
+ testScanf("s%Las", 1, LD);
+ testScanf("%ar", 1, F);
+
+ // In the cases with std::min below the format spec can be interpreted as
+ // either floating-something, or (GNU extension) callee-allocated string.
+ // Our conservative implementation reports one of the two possibilities with
+ // the least store range.
testScanf("%a[", 0);
- testScanf("%as", 0);
- testScanf("%aS", 0);
- testScanf("%a13S", 0);
- testScanf("%alS", 0);
+ testScanf("%a[]", 0);
+ testScanf("%a[]]", 1, std::min(F, P));
+ testScanf("%a[abc]", 1, std::min(F, P));
+ testScanf("%a[^abc]", 1, std::min(F, P));
+ testScanf("%a[ab%c] %d", 0);
+ testScanf("%a[^ab%c] %d", 0);
+ testScanf("%as", 1, std::min(F, P));
+ testScanf("%aS", 1, std::min(F, P));
+ testScanf("%a13S", 1, std::min(F, P));
+ testScanf("%alS", 1, std::min(F, P));
testScanf("%5$d", 0);
testScanf("%md", 0);
More information about the llvm-commits
mailing list