[compiler-rt] r174882 - [sanitizer] Scanf parser improvements.

Evgeniy Stepanov eugeni.stepanov at gmail.com
Mon Feb 11 06:04:24 PST 2013


Author: eugenis
Date: Mon Feb 11 08:04:24 2013
New Revision: 174882

URL: http://llvm.org/viewvc/llvm-project?rev=174882&view=rev
Log:
[sanitizer] Scanf parser improvements.

Handle %a in cases when it is unambiguous.
Handle %m.

Patch by Jakub Jelinek.

Modified:
    compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors_scanf.inc
    compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_scanf_interceptor_test.cc

Modified: compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors_scanf.inc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors_scanf.inc?rev=174882&r1=174881&r2=174882&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors_scanf.inc (original)
+++ compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors_scanf.inc Mon Feb 11 08:04:24 2013
@@ -20,11 +20,12 @@
 
 struct ScanfDirective {
   int argIdx;      // argument index, or -1 of not specified ("%n$")
-  bool suppressed; // suppress assignment ("*")
   int fieldWidth;
+  bool suppressed; // suppress assignment ("*")
   bool allocate; // allocate space ("m")
   char lengthModifier[2];
   char convSpecifier;
+  bool maybeGnuMalloc;
 };
 
 static const char *parse_number(const char *p, int *out) {
@@ -121,6 +122,31 @@ static const char *scanf_parse_next(cons
                   // Consume the closing ']'.
       ++p;
     }
+    // This is unfortunately ambiguous between old GNU extension
+    // of %as, %aS and %a[...] and newer POSIX %a followed by
+    // letters s, S or [.
+    if (dir->convSpecifier == 'a' && !dir->lengthModifier[0]) {
+      if (*p == 's' || *p == 'S') {
+        dir->maybeGnuMalloc = true;
+        ++p;
+      } else if (*p == '[') {
+        // Watch for %a[h-j%d], if % appears in the
+        // [...] range, then we need to give up, we don't know
+        // if scanf will parse it as POSIX %a [h-j %d ] or
+        // GNU allocation of string with range dh-j plus %.
+        const char *q = p + 1;
+        if (*q == '^')
+          ++q;
+        if (*q == ']')
+          ++q;
+        while (*q && *q != ']' && *q != '%')
+          ++q;
+        if (*q == 0 || *q == '%')
+          return 0;
+        p = q + 1; // Consume the closing ']'.
+        dir->maybeGnuMalloc = true;
+      }
+    }
     break;
   }
   return p;
@@ -133,9 +159,7 @@ static bool scanf_is_integer_conv(char c
 
 // Returns true if the character is an floating point conversion specifier.
 static bool scanf_is_float_conv(char c) {
-  return char_is_one_of(c, "AeEfFgG");
-  // NOTE: c == 'a' is ambiguous between POSIX and GNU and, therefore,
-  // unsupported.
+  return char_is_one_of(c, "aAeEfFgG");
 }
 
 // Returns string output character size for string-like conversions,
@@ -170,6 +194,21 @@ enum ScanfStoreSize {
 // Returns the store size of a scanf directive (if >0), or a value of
 // ScanfStoreSize.
 static int scanf_get_store_size(ScanfDirective *dir) {
+  if (dir->allocate) {
+    if (!char_is_one_of(dir->convSpecifier, "cCsS["))
+      return SSS_INVALID;
+    return sizeof(char *);
+  }
+
+  if (dir->maybeGnuMalloc) {
+    if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
+      return SSS_INVALID;
+    // This is ambiguous, so check the smaller size of char * (if it is
+    // a GNU extension of %as, %aS or %a[...]) and float (if it is
+    // POSIX %a followed by s, S or [ letters).
+    return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
+  }
+
   if (scanf_is_integer_conv(dir->convSpecifier)) {
     switch (dir->lengthModifier[0]) {
     case 'h':
@@ -258,11 +297,6 @@ static void scanf_common(void *ctx, cons
     }
     if (dir.suppressed)
       continue;
-    if (dir.allocate) {
-      // Unsupported;
-      continue;
-    }
-
     int size = scanf_get_store_size(&dir);
     if (size == SSS_INVALID)
       break;

Modified: compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_scanf_interceptor_test.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_scanf_interceptor_test.cc?rev=174882&r1=174881&r2=174882&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_scanf_interceptor_test.cc (original)
+++ compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_scanf_interceptor_test.cc Mon Feb 11 08:04:24 2013
@@ -60,7 +60,9 @@ TEST(SanitizerCommonInterceptors, Scanf)
   const unsigned S = sizeof(short);  // NOLINT
   const unsigned C = sizeof(char);  // NOLINT
   const unsigned D = sizeof(double);  // NOLINT
+  const unsigned LD = sizeof(long double);  // NOLINT
   const unsigned F = sizeof(float);  // NOLINT
+  const unsigned P = sizeof(char*);  // NOLINT
 
   testScanf("%d", 1, I);
   testScanf("%d%d%d", 3, I, I, I);
@@ -102,12 +104,25 @@ TEST(SanitizerCommonInterceptors, Scanf)
   testScanf("%c%d", 2, C, I);
   testScanf("%A%lf", 2, F, D);
 
-  // Unsupported stuff.
+  testScanf("%ms %Lf", 2, P, LD);
+  testScanf("s%Las", 1, LD);
+  testScanf("%ar", 1, F);
+
+  // In the cases with std::min below the format spec can be interpreted as
+  // either floating-something, or (GNU extension) callee-allocated string.
+  // Our conservative implementation reports one of the two possibilities with
+  // the least store range.
   testScanf("%a[", 0);
-  testScanf("%as", 0);
-  testScanf("%aS", 0);
-  testScanf("%a13S", 0);
-  testScanf("%alS", 0);
+  testScanf("%a[]", 0);
+  testScanf("%a[]]", 1, std::min(F, P));
+  testScanf("%a[abc]", 1, std::min(F, P));
+  testScanf("%a[^abc]", 1, std::min(F, P));
+  testScanf("%a[ab%c] %d", 0);
+  testScanf("%a[^ab%c] %d", 0);
+  testScanf("%as", 1, std::min(F, P));
+  testScanf("%aS", 1, std::min(F, P));
+  testScanf("%a13S", 1, std::min(F, P));
+  testScanf("%alS", 1, std::min(F, P));
 
   testScanf("%5$d", 0);
   testScanf("%md", 0);





More information about the llvm-commits mailing list