[PATCH] [sanitizer] Improve scanf interceptor
Alexey Samsonov
samsonov at google.com
Thu Feb 7 07:11:01 PST 2013
================
Comment at: sanitizer_common_interceptors_scanf.inc:29
@@ +28,3 @@
+// Returns next position in the format string.
+// Returns 0 if an error is encountered.
+static const char *scanf_parse_next(const char *p, ScanfDirective *dir) {
----------------
... or there are no more directives.
================
Comment at: sanitizer_common_interceptors_scanf.inc:28
@@ -23,72 +27,3 @@
-// One-letter specs.
-static const ScanfSpec scanf_specs[] = {
- {'s', 1}, // FIXME: This is incorrect, we should check the actual number
- // of bytes written to the string.
- {'c', sizeof(char)},
- {'p', sizeof(void *)},
- {'e', sizeof(float)},
- {'E', sizeof(float)},
- {'a', sizeof(float)},
- {'f', sizeof(float)},
- {'g', sizeof(float)},
- {'d', sizeof(int)},
- {'i', sizeof(int)},
- {'o', sizeof(int)},
- {'u', sizeof(int)},
- {'x', sizeof(int)},
- {'X', sizeof(int)},
- {'n', sizeof(int)},
- {'t', sizeof(PTRDIFF_T)},
- {'z', sizeof(SIZE_T)},
- {'j', sizeof(INTMAX_T)},
- {'h', sizeof(short)}
-};
-
-static const unsigned scanf_specs_cnt =
- sizeof(scanf_specs) / sizeof(scanf_specs[0]);
-
-// %ll?, %L?, %q? specs
-static const ScanfSpec scanf_llspecs[] = {
- {'e', sizeof(long double)},
- {'f', sizeof(long double)},
- {'g', sizeof(long double)},
- {'d', sizeof(long long)},
- {'i', sizeof(long long)},
- {'o', sizeof(long long)},
- {'u', sizeof(long long)},
- {'x', sizeof(long long)}
-};
-
-static const unsigned scanf_llspecs_cnt =
- sizeof(scanf_llspecs) / sizeof(scanf_llspecs[0]);
-
-// %l? specs
-static const ScanfSpec scanf_lspecs[] = {
- {'e', sizeof(double)},
- {'f', sizeof(double)},
- {'g', sizeof(double)},
- {'d', sizeof(long)},
- {'i', sizeof(long)},
- {'o', sizeof(long)},
- {'u', sizeof(long)},
- {'x', sizeof(long)},
- {'X', sizeof(long)},
-};
-
-static const unsigned scanf_lspecs_cnt =
- sizeof(scanf_lspecs) / sizeof(scanf_lspecs[0]);
-
-static unsigned match_spec(const struct ScanfSpec *spec, unsigned n, char c) {
- for (unsigned i = 0; i < n; ++i)
- if (spec[i].c == c)
- return spec[i].size;
- return 0;
-}
-
-static void scanf_common(void *ctx, const char *format, va_list ap_const) {
- va_list aq;
- va_copy(aq, ap_const);
-
- const char *p = format;
- unsigned size;
+// Returns next position in the format string.
+// Returns 0 if an error is encountered.
----------------
Parses the next scanf directive and returns ....
================
Comment at: sanitizer_common_interceptors_scanf.inc:70
@@ +69,3 @@
+ }
+ // Field width.
+ if (*p >= '0' && *p <= '9') {
----------------
Can you hoist parsing field width to a function?
================
Comment at: sanitizer_common_interceptors_scanf.inc:48
@@ -111,1 +47,3 @@
+ }
+ // A number. Either %n$ syntax, or the field width.
if (*p >= '0' && *p <= '9') {
----------------
I think you try to distinguish %n$ case (try to parse a number, check if it's followed by $), and if not, fall back to the general case, parsing stuff in this order:
1) allocation suppression
2) "a" character
3) field width
4) type modifier
5) conv specifier
================
Comment at: sanitizer_common_interceptors_scanf.inc:80
@@ +79,3 @@
+ // m
+ if (*p == 'm') {
+ dir->allocate = true;
----------------
Hm? My "man scanf" tells it should be 'a'
================
Comment at: sanitizer_common_interceptors_scanf.inc:85
@@ +84,3 @@
+ // Length modifier.
+ if (*p == 'j' || *p == 'z' || *p == 't' || *p == 'L' || *p == 'q') {
+ dir->lengthModifier[0] = *p;
----------------
Can you use smth like internal_strchr(*p, "jztLq") here and below?
================
Comment at: sanitizer_common_interceptors_scanf.inc:130
@@ +129,3 @@
+static bool scanf_is_integer_conv(char c) {
+ return c == 'd' ||
+ c == 'i' ||
----------------
strchr here as well
================
Comment at: sanitizer_common_interceptors_scanf.inc:140
@@ +139,3 @@
+static bool scanf_is_float_conv(char c) {
+ return c == 'a' || c == 'A' ||
+ c == 'e' || c == 'E' ||
----------------
and here
================
Comment at: sanitizer_common_interceptors_scanf.inc:143
@@ +142,3 @@
+ c == 'f' || c == 'F' ||
+ c == 'g' || c == 'G';
+}
----------------
my man doesn't know about "A", "F", "G". Is there online page with specs you used? Probably you can give a link in the code.
================
Comment at: sanitizer_common_interceptors_scanf.inc:172
@@ +171,3 @@
+ if (scanf_is_integer_conv(dir->convSpecifier)) {
+ switch (dir->lengthModifier[0]) {
+ case 'h':
----------------
Why did you drop arrays of pairs { ('j', sizeof(INTMAX_T) , ... }?
================
Comment at: sanitizer_common_interceptors_scanf.inc:251
@@ +250,3 @@
+
+ printf("START: %s\n", format);
+
----------------
Remove this
================
Comment at: sanitizer_common_interceptors_scanf.inc:265
@@ +264,3 @@
+
+ printf(" SPEC: field %d, length %c%c, conv %c, argidx %d, %s%s\n",
+ dir.fieldWidth,
----------------
Remove this
http://llvm-reviews.chandlerc.com/D381
More information about the llvm-commits
mailing list