[compiler-rt] r207443 - [Sanitizer] Add rudimentary support for wide-character strings to scanf/printf interceptors

Alexey Samsonov samsonov at google.com
Mon Apr 28 13:40:25 PDT 2014


Author: samsonov
Date: Mon Apr 28 15:40:25 2014
New Revision: 207443

URL: http://llvm.org/viewvc/llvm-project?rev=207443&view=rev
Log:
[Sanitizer] Add rudimentary support for wide-character strings to scanf/printf interceptors

Modified:
    compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors_format.inc
    compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_format_interceptor_test.cc

Modified: compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors_format.inc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors_format.inc?rev=207443&r1=207442&r2=207443&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors_format.inc (original)
+++ compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors_format.inc Mon Apr 28 15:40:25 2014
@@ -80,24 +80,23 @@ static bool format_is_float_conv(char c)
 static int format_get_char_size(char convSpecifier,
                                 const char lengthModifier[2]) {
   if (char_is_one_of(convSpecifier, "CS")) {
-    // wchar_t
-    return 0;
+    return sizeof(wchar_t);
   }
 
   if (char_is_one_of(convSpecifier, "cs[")) {
-    if (lengthModifier[0] == 'l')
-      // wchar_t
-      return 0;
-    else if (lengthModifier[0] == 0)
+    if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0')
+      return sizeof(wchar_t);
+    else if (lengthModifier[0] == '\0')
       return sizeof(char);
-    else
-      return 0;
   }
 
   return 0;
 }
 
 enum FormatStoreSize {
+  // Store size not known in advance; can be calculated as wcslen() of the
+  // destination buffer.
+  FSS_WCSLEN = -2,
   // Store size not known in advance; can be calculated as strlen() of the
   // destination buffer.
   FSS_STRLEN = -1,
@@ -108,7 +107,7 @@ enum FormatStoreSize {
 // Returns the memory size of a format directive (if >0), or a value of
 // FormatStoreSize.
 static int format_get_value_size(char convSpecifier,
-                                 const char lengthModifier[2], int fieldWidth,
+                                 const char lengthModifier[2],
                                  bool promote_float) {
   if (format_is_integer_conv(convSpecifier)) {
     switch (lengthModifier[0]) {
@@ -149,15 +148,6 @@ static int format_get_value_size(char co
     }
   }
 
-  if (char_is_one_of(convSpecifier, "cC")) {
-    unsigned charSize = format_get_char_size(convSpecifier, lengthModifier);
-    if (charSize == 0)
-      return FSS_INVALID;
-    if (fieldWidth == 0)
-      return charSize;
-    return fieldWidth * charSize;
-  }
-
   if (convSpecifier == 'p') {
     if (lengthModifier[0] != 0)
       return FSS_INVALID;
@@ -288,18 +278,21 @@ static int scanf_get_value_size(ScanfDir
     return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
   }
 
-  if (char_is_one_of(dir->convSpecifier, "sS[")) {
-    unsigned charSize = format_get_char_size(dir->convSpecifier,
-                                             dir->lengthModifier);
+  if (char_is_one_of(dir->convSpecifier, "cCsS[")) {
+    bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS[");
+    unsigned charSize =
+        format_get_char_size(dir->convSpecifier, dir->lengthModifier);
     if (charSize == 0)
       return FSS_INVALID;
-    if (dir->fieldWidth == 0)
-      return FSS_STRLEN;
-    return (dir->fieldWidth + 1) * charSize;
+    if (dir->fieldWidth == 0) {
+      if (!needsTerminator)
+        return charSize;
+      return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
+    }
+    return (dir->fieldWidth + needsTerminator) * charSize;
   }
 
-  return format_get_value_size(dir->convSpecifier, dir->lengthModifier,
-                               dir->fieldWidth, false);
+  return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false);
 }
 
 // Common part of *scanf interceptors.
@@ -342,6 +335,9 @@ static void scanf_common(void *ctx, int
       break;
     if (size == FSS_STRLEN) {
       size = internal_strlen((const char *)argp) + 1;
+    } else if (size == FSS_WCSLEN) {
+      // FIXME: actually use wcslen() to calculate it.
+      size = 0;
     }
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
   }
@@ -442,16 +438,18 @@ static int printf_get_value_size(PrintfD
     return sizeof(char *);
   }
 
-  if (char_is_one_of(dir->convSpecifier, "sS")) {
-    unsigned charSize = format_get_char_size(dir->convSpecifier,
-                                             dir->lengthModifier);
+  if (char_is_one_of(dir->convSpecifier, "cCsS")) {
+    unsigned charSize =
+        format_get_char_size(dir->convSpecifier, dir->lengthModifier);
     if (charSize == 0)
       return FSS_INVALID;
-    return FSS_STRLEN;
+    if (char_is_one_of(dir->convSpecifier, "sS")) {
+      return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
+    }
+    return charSize;
   }
 
-  return format_get_value_size(dir->convSpecifier, dir->lengthModifier,
-                               dir->fieldWidth, true);
+  return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true);
 }
 
 #define SKIP_SCALAR_ARG(aq, convSpecifier, size)                   \
@@ -542,6 +540,12 @@ static void printf_common(void *ctx, con
         }
         COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
       }
+    } else if (size == FSS_WCSLEN) {
+      if (void *argp = va_arg(aq, void *)) {
+        // FIXME: Properly support wide-character strings (via wcsrtombs).
+        size = 0;
+        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
+      }
     } else {
       // Skip non-pointer args
       SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size);

Modified: compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_format_interceptor_test.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_format_interceptor_test.cc?rev=207443&r1=207442&r2=207443&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_format_interceptor_test.cc (original)
+++ compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_format_interceptor_test.cc Mon Apr 28 15:40:25 2014
@@ -40,6 +40,7 @@ static const unsigned L = sizeof(long);
 static const unsigned LL = sizeof(long long);
 static const unsigned S = sizeof(short);
 static const unsigned C = sizeof(char);
+static const unsigned LC = sizeof(wchar_t);
 static const unsigned D = sizeof(double);
 static const unsigned LD = sizeof(long double);
 static const unsigned F = sizeof(float);
@@ -114,6 +115,7 @@ TEST(SanitizerCommonInterceptors, Scanf)
   testScanf("%qd", 1, LL);
   testScanf("a %hd%hhx", 2, S, C);
   testScanf("%c", 1, C);
+  testScanf("%lc", 1, LC);
 
   testScanf("%%", 0);
   testScanf("a%%", 0);
@@ -129,6 +131,8 @@ TEST(SanitizerCommonInterceptors, Scanf)
 
   testScanf("%10s", 1, 11);
   testScanf("%10c", 1, 10);
+  testScanf("%10ls", 1, 11 * LC);
+  testScanf("%10lc", 1, 10 * LC);
   testScanf("%%10s", 0);
   testScanf("%*10s", 0);
   testScanf("%*d", 0);
@@ -248,4 +252,7 @@ TEST(SanitizerCommonInterceptors, Printf
 
   // Dynamic precision for strings is not implemented yet.
   testPrintf("%.*s", 1, 0);
+
+  // Checks for wide-character strings are not implemented yet.
+  testPrintf("%ls", 1, 0);
 }





More information about the llvm-commits mailing list