[compiler-rt] [DFSan] Fix sscanf wrapper handling %*d (the star skips capturing). (PR #67392)

Andrew Browne via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 25 22:20:52 PDT 2023

https://github.com/browneee created https://github.com/llvm/llvm-project/pull/67392

Bug introduced in https://reviews.llvm.org/D153775

>From 33d1b30e07eb7f718cddcd898cefc934613248bd Mon Sep 17 00:00:00 2001
From: Andrew Browne <browneee at google.com>
Date: Tue, 26 Sep 2023 02:13:16 +0000
Subject: [PATCH] [DFSan] Fix sscanf wrapper handling %*d (the star skips

Bug introduced in https://reviews.llvm.org/D153775
 compiler-rt/lib/dfsan/dfsan_custom.cpp | 251 ++++++++++++++-----------
 compiler-rt/test/dfsan/custom.cpp      |  49 +++--
 2 files changed, 167 insertions(+), 133 deletions(-)

diff --git a/compiler-rt/lib/dfsan/dfsan_custom.cpp b/compiler-rt/lib/dfsan/dfsan_custom.cpp
index cffd4bcfd8c192d..6cad29f0c76c55e 100644
--- a/compiler-rt/lib/dfsan/dfsan_custom.cpp
+++ b/compiler-rt/lib/dfsan/dfsan_custom.cpp
@@ -2246,7 +2246,8 @@ struct Formatter {
-        num_scanned(-1) {}
+        num_scanned(-1),
+        skip(false) {}
   int format() {
     char *tmp_fmt = build_format_string();
@@ -2347,6 +2348,7 @@ struct Formatter {
   const char *fmt_cur;
   int width;
   int num_scanned;
+  bool skip;
 // Formats the input and propagates the input labels to the output. The output
@@ -2550,8 +2552,8 @@ static int scan_buffer(char *str, size_t size, const char *fmt,
   while (*formatter.fmt_cur) {
     formatter.fmt_start = formatter.fmt_cur;
     formatter.width = -1;
-    int retval = 0;
-    dfsan_label l = 0;
+    formatter.skip = false;
+    int read_count = 0;
     void *dst_ptr = 0;
     size_t write_size = 0;
     if (*formatter.fmt_cur != '%') {
@@ -2560,9 +2562,9 @@ static int scan_buffer(char *str, size_t size, const char *fmt,
       for (; *(formatter.fmt_cur + 1) && *(formatter.fmt_cur + 1) != '%';
            ++formatter.fmt_cur) {
-      retval = formatter.scan();
+      read_count = formatter.scan();
       dfsan_set_label(0, formatter.str_cur(),
-                      formatter.num_written_bytes(retval));
+                      formatter.num_written_bytes(read_count));
     } else {
       // Conversion directive. Consume all the characters until a conversion
       // specifier or the end of the string.
@@ -2575,58 +2577,61 @@ static int scan_buffer(char *str, size_t size, const char *fmt,
         case 'u':
         case 'x':
         case 'X':
-          switch (*(formatter.fmt_cur - 1)) {
+          if (formatter.skip) {
+            read_count = formatter.scan();
+          } else {
+            switch (*(formatter.fmt_cur - 1)) {
             case 'h':
-            // Also covers the 'hh' case (since the size of the arg is still
-            // an int).
-            dst_ptr = va_arg(ap, int *);
-            retval = formatter.scan((int *)dst_ptr);
-            write_size = sizeof(int);
-            break;
+              // Also covers the 'hh' case (since the size of the arg is still
+              // an int).
+              dst_ptr = va_arg(ap, int *);
+              read_count = formatter.scan((int *)dst_ptr);
+              write_size = sizeof(int);
+              break;
             case 'l':
-            if (formatter.fmt_cur - formatter.fmt_start >= 2 &&
-                *(formatter.fmt_cur - 2) == 'l') {
+              if (formatter.fmt_cur - formatter.fmt_start >= 2 &&
+                  *(formatter.fmt_cur - 2) == 'l') {
+                dst_ptr = va_arg(ap, long long int *);
+                read_count = formatter.scan((long long int *)dst_ptr);
+                write_size = sizeof(long long int);
+              } else {
+                dst_ptr = va_arg(ap, long int *);
+                read_count = formatter.scan((long int *)dst_ptr);
+                write_size = sizeof(long int);
+              }
+              break;
+            case 'q':
               dst_ptr = va_arg(ap, long long int *);
-              retval = formatter.scan((long long int *)dst_ptr);
+              read_count = formatter.scan((long long int *)dst_ptr);
               write_size = sizeof(long long int);
-            } else {
-              dst_ptr = va_arg(ap, long int *);
-              retval = formatter.scan((long int *)dst_ptr);
-              write_size = sizeof(long int);
-            }
-            break;
-            case 'q':
-            dst_ptr = va_arg(ap, long long int *);
-            retval = formatter.scan((long long int *)dst_ptr);
-            write_size = sizeof(long long int);
-            break;
+              break;
             case 'j':
-            dst_ptr = va_arg(ap, intmax_t *);
-            retval = formatter.scan((intmax_t *)dst_ptr);
-            write_size = sizeof(intmax_t);
-            break;
+              dst_ptr = va_arg(ap, intmax_t *);
+              read_count = formatter.scan((intmax_t *)dst_ptr);
+              write_size = sizeof(intmax_t);
+              break;
             case 'z':
             case 't':
-            dst_ptr = va_arg(ap, size_t *);
-            retval = formatter.scan((size_t *)dst_ptr);
-            write_size = sizeof(size_t);
-            break;
+              dst_ptr = va_arg(ap, size_t *);
+              read_count = formatter.scan((size_t *)dst_ptr);
+              write_size = sizeof(size_t);
+              break;
-            dst_ptr = va_arg(ap, int *);
-            retval = formatter.scan((int *)dst_ptr);
-            write_size = sizeof(int);
-          }
-          // get the label associated with the string at the corresponding
-          // place
-          l = dfsan_read_label(formatter.str_cur(),
-                               formatter.num_written_bytes(retval));
-          if (str_origin == nullptr)
-            dfsan_set_label(l, dst_ptr, write_size);
-          else {
+              dst_ptr = va_arg(ap, int *);
+              read_count = formatter.scan((int *)dst_ptr);
+              write_size = sizeof(int);
+            }
+            // get the label associated with the string at the corresponding
+            // place
+            dfsan_label l = dfsan_read_label(formatter.str_cur(),
+                                 formatter.num_written_bytes(read_count));
             dfsan_set_label(l, dst_ptr, write_size);
-            size_t scan_count = formatter.num_written_bytes(retval);
-            size_t size = scan_count > write_size ? write_size : scan_count;
-            dfsan_mem_origin_transfer(dst_ptr, formatter.str_cur(), size);
+            if (str_origin != nullptr) {
+              dfsan_set_label(l, dst_ptr, write_size);
+              size_t scan_count = formatter.num_written_bytes(read_count);
+              size_t size = scan_count > write_size ? write_size : scan_count;
+              dfsan_mem_origin_transfer(dst_ptr, formatter.str_cur(), size);
+            }
           end_fmt = true;
@@ -2640,104 +2645,119 @@ static int scan_buffer(char *str, size_t size, const char *fmt,
         case 'F':
         case 'g':
         case 'G':
-          if (*(formatter.fmt_cur - 1) == 'L') {
-            dst_ptr = va_arg(ap, long double *);
-            retval = formatter.scan((long double *)dst_ptr);
-            write_size = sizeof(long double);
-          } else if (*(formatter.fmt_cur - 1) == 'l') {
-            dst_ptr = va_arg(ap, double *);
-            retval = formatter.scan((double *)dst_ptr);
-            write_size = sizeof(double);
+          if (formatter.skip) {
+            read_count = formatter.scan();
           } else {
-            dst_ptr = va_arg(ap, float *);
-            retval = formatter.scan((float *)dst_ptr);
-            write_size = sizeof(float);
-          }
-          l = dfsan_read_label(formatter.str_cur(),
-                               formatter.num_written_bytes(retval));
-          if (str_origin == nullptr)
-            dfsan_set_label(l, dst_ptr, write_size);
-          else {
+            if (*(formatter.fmt_cur - 1) == 'L') {
+              dst_ptr = va_arg(ap, long double *);
+              read_count = formatter.scan((long double *)dst_ptr);
+              write_size = sizeof(long double);
+            } else if (*(formatter.fmt_cur - 1) == 'l') {
+              dst_ptr = va_arg(ap, double *);
+              read_count = formatter.scan((double *)dst_ptr);
+              write_size = sizeof(double);
+            } else {
+              dst_ptr = va_arg(ap, float *);
+              read_count = formatter.scan((float *)dst_ptr);
+              write_size = sizeof(float);
+            }
+            dfsan_label l = dfsan_read_label(formatter.str_cur(),
+                                 formatter.num_written_bytes(read_count));
             dfsan_set_label(l, dst_ptr, write_size);
-            size_t scan_count = formatter.num_written_bytes(retval);
-            size_t size = scan_count > write_size ? write_size : scan_count;
-            dfsan_mem_origin_transfer(dst_ptr, formatter.str_cur(), size);
+            if (str_origin != nullptr) {
+              dfsan_set_label(l, dst_ptr, write_size);
+              size_t scan_count = formatter.num_written_bytes(read_count);
+              size_t size = scan_count > write_size ? write_size : scan_count;
+              dfsan_mem_origin_transfer(dst_ptr, formatter.str_cur(), size);
+            }
           end_fmt = true;
         case 'c':
-          dst_ptr = va_arg(ap, char *);
-          retval = formatter.scan((char *)dst_ptr);
-          write_size = sizeof(char);
-          l = dfsan_read_label(formatter.str_cur(),
-                               formatter.num_written_bytes(retval));
-          if (str_origin == nullptr)
-            dfsan_set_label(l, dst_ptr, write_size);
-          else {
+          if (formatter.skip) {
+            read_count = formatter.scan();
+          } else {
+            dst_ptr = va_arg(ap, char *);
+            read_count = formatter.scan((char *)dst_ptr);
+            write_size = sizeof(char);
+            dfsan_label l = dfsan_read_label(formatter.str_cur(),
+                                 formatter.num_written_bytes(read_count));
             dfsan_set_label(l, dst_ptr, write_size);
-            size_t scan_count = formatter.num_written_bytes(retval);
-            size_t size = scan_count > write_size ? write_size : scan_count;
-            dfsan_mem_origin_transfer(dst_ptr, formatter.str_cur(), size);
+            if (str_origin != nullptr) {
+              size_t scan_count = formatter.num_written_bytes(read_count);
+              size_t size = scan_count > write_size ? write_size : scan_count;
+              dfsan_mem_origin_transfer(dst_ptr, formatter.str_cur(), size);
+            }
           end_fmt = true;
         case 's': {
-          dst_ptr = va_arg(ap, char *);
-          retval = formatter.scan((char *)dst_ptr);
-          if (1 == retval) {
-            // special case: we have parsed a single string and we need to
-            // update retval with the string size
-            retval = strlen((char *)dst_ptr);
+          if (formatter.skip) {
+            read_count = formatter.scan();
+          } else {
+            dst_ptr = va_arg(ap, char *);
+            read_count = formatter.scan((char *)dst_ptr);
+            if (1 == read_count) {
+              // special case: we have parsed a single string and we need to
+              // update read_count with the string size
+              read_count = strlen((char *)dst_ptr);
+            }
+            if (str_origin)
+              dfsan_mem_origin_transfer(dst_ptr, formatter.str_cur(),
+                                        formatter.num_written_bytes(read_count));
+            va_labels++;
+            dfsan_mem_shadow_transfer(dst_ptr, formatter.str_cur(),
+                                      formatter.num_written_bytes(read_count));
-          if (str_origin)
-            dfsan_mem_origin_transfer(dst_ptr, formatter.str_cur(),
-                                      formatter.num_written_bytes(retval));
-          va_labels++;
-          dfsan_mem_shadow_transfer(dst_ptr, formatter.str_cur(),
-                                    formatter.num_written_bytes(retval));
           end_fmt = true;
         case 'p':
-          dst_ptr = va_arg(ap, void *);
-          retval =
-              formatter.scan((int *)dst_ptr);  // note: changing void* to int*
-                                               // since we need to call sizeof
-          write_size = sizeof(int);
-          l = dfsan_read_label(formatter.str_cur(),
-                               formatter.num_written_bytes(retval));
-          if (str_origin == nullptr)
-            dfsan_set_label(l, dst_ptr, write_size);
-          else {
+          if (formatter.skip) {
+            read_count = formatter.scan();
+          } else {
+            dst_ptr = va_arg(ap, void *);
+            read_count =
+                formatter.scan((int *)dst_ptr);  // note: changing void* to int*
+                                                 // since we need to call sizeof
+            write_size = sizeof(int);
+            dfsan_label l = dfsan_read_label(formatter.str_cur(),
+                                 formatter.num_written_bytes(read_count));
             dfsan_set_label(l, dst_ptr, write_size);
-            size_t scan_count = formatter.num_written_bytes(retval);
-            size_t size = scan_count > write_size ? write_size : scan_count;
-            dfsan_mem_origin_transfer(dst_ptr, formatter.str_cur(), size);
+            if (str_origin != nullptr) {
+              dfsan_set_label(l, dst_ptr, write_size);
+              size_t scan_count = formatter.num_written_bytes(read_count);
+              size_t size = scan_count > write_size ? write_size : scan_count;
+              dfsan_mem_origin_transfer(dst_ptr, formatter.str_cur(), size);
+            }
           end_fmt = true;
         case 'n': {
-          int *ptr = va_arg(ap, int *);
-          *ptr = (int)formatter.str_off;
-          va_labels++;
-          dfsan_set_label(0, ptr, sizeof(*ptr));
+          if (!formatter.skip) {
+            int *ptr = va_arg(ap, int *);
+            *ptr = (int)formatter.str_off;
+            *va_labels++ = 0;
+            dfsan_set_label(0, ptr, sizeof(*ptr));
+            if (str_origin != nullptr)
+              *str_origin++ = 0;
+          }
           end_fmt = true;
         case '%':
-          retval = formatter.scan();
+          read_count = formatter.scan();
           end_fmt = true;
         case '*':
-          formatter.width = va_arg(ap, int);
-          va_labels++;
+          formatter.skip = true;
@@ -2746,12 +2766,13 @@ static int scan_buffer(char *str, size_t size, const char *fmt,
-    if (retval < 0) {
-      return retval;
+    if (read_count < 0) {
+      // There was an error.
+      return read_count;
-    formatter.str_off += retval;
+    formatter.str_off += read_count;
   (void)va_labels; // Silence unused-but-set-parameter warning
diff --git a/compiler-rt/test/dfsan/custom.cpp b/compiler-rt/test/dfsan/custom.cpp
index dfc24ee3019efb9..c8836707fc91f5a 100644
--- a/compiler-rt/test/dfsan/custom.cpp
+++ b/compiler-rt/test/dfsan/custom.cpp
@@ -2101,18 +2101,24 @@ void test_sscanf_chunk(T expected, const char *format, char *input,
   char padded_input[512];
   strcpy(padded_input, "foo ");
   strcat(padded_input, input);
+  strcpy(padded_input, "@");
+  strcat(padded_input, input);
   strcat(padded_input, " bar");
   char padded_format[512];
   strcpy(padded_format, "foo ");
+  // Swap the first '%' for '%*' so this input is skipped.
+  strcpy(padded_format, "%*");
+  strcat(padded_format, format + 1);
+  strcpy(padded_format, "@");
   strcat(padded_format, format);
   strcat(padded_format, " bar");
   char *s = padded_input + 4;
   T arg;
   memset(&arg, 0, sizeof(arg));
-  dfsan_set_label(i_label, (void *)(s), strlen(input));
-  dfsan_set_label(j_label, (void *)(padded_format + 4), strlen(format));
+  dfsan_set_label(i_label, (void *)(padded_input), strlen(padded_input));
+  dfsan_set_label(j_label, (void *)(padded_format), strlen(padded_format));
   dfsan_origin a_o = dfsan_get_origin((long)(*s));
@@ -2150,7 +2156,7 @@ void test_sscanf() {
   // Test formatting & label propagation (multiple conversion specifiers): %s,
   // %d, %n, %f, and %%.
   int n;
-  strcpy(buf, "hello world, 2014/8/27 12345.678123 % 1000");
+  strcpy(buf, "hello world, 42 2014/8/31 12345.678123 % 1000");
   char *s = buf + 6; //starts with world
   int y = 0;
   int m = 0;
@@ -2159,41 +2165,48 @@ void test_sscanf() {
   int val = 0;
   dfsan_set_label(k_label, (void *)(s + 1), 2); // buf[7]-b[9]
   dfsan_origin s_o = dfsan_get_origin((long)(s[1]));
-  dfsan_set_label(i_label, (void *)(s + 12), 1);
-  dfsan_origin m_o = dfsan_get_origin((long)s[12]); // buf[18]
-  dfsan_set_label(j_label, (void *)(s + 14), 2);    // buf[20]
-  dfsan_origin d_o = dfsan_get_origin((long)s[14]);
-  dfsan_set_label(m_label, (void *)(s + 18), 4); //buf[24]
-  dfsan_origin f_o = dfsan_get_origin((long)s[18]);
+  assert(s[10] == '2');
+  dfsan_set_label(i_label, (void *)(s + 10), 4); // 2014
+  dfsan_origin y_o = dfsan_get_origin((long)s[10]); // buf[16]
+  assert(s[17] == '3');
+  dfsan_set_label(j_label, (void *)(s + 17), 2); // 31
+  dfsan_origin d_o = dfsan_get_origin((long)s[17]); // buf[23]
+  assert(s[20] == '1');
+  dfsan_set_label(m_label, (void *)(s + 20), 5); // 12345
+  dfsan_origin f_o = dfsan_get_origin((long)s[20]); //buf[26]
-  (void)m_o;
+  (void)y_o;
   assert(s_o != 0);
-  assert(m_o != 0);
+  assert(y_o != 0);
   assert(d_o != 0);
   assert(f_o != 0);
-  int r = sscanf(buf, "hello %s %d/%d/%d %f %% %n%d", buf_out, &y, &m, &d,
+  int r = sscanf(buf, "hello %s %*d %d/%d/%d %f %% %n%d", buf_out, &y, &m, &d,
                  &fval, &n, &val);
   assert(r == 6);
   assert(strcmp(buf_out, "world,") == 0);
+  assert(y == 2014);
+  assert(m == 8);
+  assert(d == 31);
+  assert(fval > 12300.0f);
+  assert(fval < 12400.0f);
   ASSERT_READ_LABEL(buf_out, 1, 0);
   ASSERT_READ_LABEL(buf_out + 1, 2, k_label);
   ASSERT_INIT_ORIGINS(buf_out + 1, 2, s_o);
-  ASSERT_READ_LABEL(buf + 9, 9, 0);
-  ASSERT_READ_LABEL(&m, 1, i_label);
-  ASSERT_INIT_ORIGINS(&m, 1, m_o);
-  ASSERT_READ_LABEL(&d, 4, j_label);
-  ASSERT_INIT_ORIGINS(&d, 2, d_o);
+  ASSERT_READ_LABEL(&y, sizeof(y), i_label);
+  ASSERT_INIT_ORIGINS(&y, sizeof(y), y_o);
+  ASSERT_READ_LABEL(&d, sizeof(d), j_label);
+  ASSERT_INIT_ORIGINS(&d, sizeof(d), d_o);
   ASSERT_READ_LABEL(&fval, sizeof(fval), m_label);
   ASSERT_INIT_ORIGINS(&fval, sizeof(fval), f_o);
   ASSERT_READ_LABEL(&val, 4, 0);
   ASSERT_LABEL(r, 0);
-  assert(n == 38);
+  assert(n == 41);
   assert(val == 1000);
   // Test formatting & label propagation (single conversion specifier, with

More information about the llvm-commits mailing list