[PATCH] [sanitizer] Improve scanf interceptor

Alexey Samsonov samsonov at google.com
Thu Feb 7 07:11:01 PST 2013



================
Comment at: sanitizer_common_interceptors_scanf.inc:29
@@ +28,3 @@
+// Returns next position in the format string.
+// Returns 0 if an error is encountered.
+static const char *scanf_parse_next(const char *p, ScanfDirective *dir) {
----------------
... or there are no more directives.

================
Comment at: sanitizer_common_interceptors_scanf.inc:28
@@ -23,72 +27,3 @@
 
-// One-letter specs.
-static const ScanfSpec scanf_specs[] = {
-  {'s', 1},  // FIXME: This is incorrect, we should check the actual number
-             // of bytes written to the string.
-  {'c', sizeof(char)},
-  {'p', sizeof(void *)},
-  {'e', sizeof(float)},
-  {'E', sizeof(float)},
-  {'a', sizeof(float)},
-  {'f', sizeof(float)},
-  {'g', sizeof(float)},
-  {'d', sizeof(int)},
-  {'i', sizeof(int)},
-  {'o', sizeof(int)},
-  {'u', sizeof(int)},
-  {'x', sizeof(int)},
-  {'X', sizeof(int)},
-  {'n', sizeof(int)},
-  {'t', sizeof(PTRDIFF_T)},
-  {'z', sizeof(SIZE_T)},
-  {'j', sizeof(INTMAX_T)},
-  {'h', sizeof(short)}
-};
-
-static const unsigned scanf_specs_cnt =
-  sizeof(scanf_specs) / sizeof(scanf_specs[0]);
-
-// %ll?, %L?, %q? specs
-static const ScanfSpec scanf_llspecs[] = {
-  {'e', sizeof(long double)},
-  {'f', sizeof(long double)},
-  {'g', sizeof(long double)},
-  {'d', sizeof(long long)},
-  {'i', sizeof(long long)},
-  {'o', sizeof(long long)},
-  {'u', sizeof(long long)},
-  {'x', sizeof(long long)}
-};
-
-static const unsigned scanf_llspecs_cnt =
-  sizeof(scanf_llspecs) / sizeof(scanf_llspecs[0]);
-
-// %l? specs
-static const ScanfSpec scanf_lspecs[] = {
-  {'e', sizeof(double)},
-  {'f', sizeof(double)},
-  {'g', sizeof(double)},
-  {'d', sizeof(long)},
-  {'i', sizeof(long)},
-  {'o', sizeof(long)},
-  {'u', sizeof(long)},
-  {'x', sizeof(long)},
-  {'X', sizeof(long)},
-};
-
-static const unsigned scanf_lspecs_cnt =
-  sizeof(scanf_lspecs) / sizeof(scanf_lspecs[0]);
-
-static unsigned match_spec(const struct ScanfSpec *spec, unsigned n, char c) {
-  for (unsigned i = 0; i < n; ++i)
-    if (spec[i].c == c)
-      return spec[i].size;
-  return 0;
-}
-
-static void scanf_common(void *ctx, const char *format, va_list ap_const) {
-  va_list aq;
-  va_copy(aq, ap_const);
-
-  const char *p = format;
-  unsigned size;
+// Returns next position in the format string.
+// Returns 0 if an error is encountered.
----------------
Parses the next scanf directive and returns ....

================
Comment at: sanitizer_common_interceptors_scanf.inc:70
@@ +69,3 @@
+      }
+      // Field width.
+      if (*p >= '0' && *p <= '9') {
----------------
Can you hoist parsing field width to a function?

================
Comment at: sanitizer_common_interceptors_scanf.inc:48
@@ -111,1 +47,3 @@
+    }
+    // A number. Either %n$ syntax, or the field width.
     if (*p >= '0' && *p <= '9') {
----------------
I think you try to distinguish %n$ case (try to parse a number, check if it's followed by $), and if not, fall back to the general case, parsing stuff in this order:
1) allocation suppression
2) "a" character
3) field width
4) type modifier
5) conv specifier

================
Comment at: sanitizer_common_interceptors_scanf.inc:80
@@ +79,3 @@
+    // m
+    if (*p == 'm') {
+      dir->allocate = true;
----------------
Hm? My "man scanf" tells it should be 'a'

================
Comment at: sanitizer_common_interceptors_scanf.inc:85
@@ +84,3 @@
+    // Length modifier.
+    if (*p == 'j' || *p == 'z' || *p == 't' || *p == 'L' || *p == 'q') {
+      dir->lengthModifier[0] = *p;
----------------
Can you use smth like internal_strchr(*p, "jztLq") here and below?

================
Comment at: sanitizer_common_interceptors_scanf.inc:130
@@ +129,3 @@
+static bool scanf_is_integer_conv(char c) {
+  return c == 'd' ||
+    c == 'i' ||
----------------
strchr here as well

================
Comment at: sanitizer_common_interceptors_scanf.inc:140
@@ +139,3 @@
+static bool scanf_is_float_conv(char c) {
+  return c == 'a' || c == 'A' ||
+    c == 'e' || c == 'E' ||
----------------
and here

================
Comment at: sanitizer_common_interceptors_scanf.inc:143
@@ +142,3 @@
+    c == 'f' || c == 'F' ||
+    c == 'g' || c == 'G';
+}
----------------
my man doesn't know about "A", "F", "G". Is there online page with specs you used? Probably you can give a link in the code.

================
Comment at: sanitizer_common_interceptors_scanf.inc:172
@@ +171,3 @@
+  if (scanf_is_integer_conv(dir->convSpecifier)) {
+    switch (dir->lengthModifier[0]) {
+    case 'h':
----------------
Why did you drop arrays of pairs { ('j', sizeof(INTMAX_T) , ... }?

================
Comment at: sanitizer_common_interceptors_scanf.inc:251
@@ +250,3 @@
+
+  printf("START: %s\n", format);
+
----------------
Remove this

================
Comment at: sanitizer_common_interceptors_scanf.inc:265
@@ +264,3 @@
+
+    printf("    SPEC: field %d, length %c%c, conv %c, argidx %d, %s%s\n",
+        dir.fieldWidth,
----------------
Remove this


http://llvm-reviews.chandlerc.com/D381



More information about the llvm-commits mailing list