[compiler-rt] 74f0051 - [DFSAN] Add support for strsep.

Andrew Browne via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 24 13:15:14 PDT 2023


Author: Tomasz Kuchta
Date: 2023-04-24T20:14:28Z
New Revision: 74f00516e5ce79a367acfd1ed1c74fa15aff69c7

URL: https://github.com/llvm/llvm-project/commit/74f00516e5ce79a367acfd1ed1c74fa15aff69c7
DIFF: https://github.com/llvm/llvm-project/commit/74f00516e5ce79a367acfd1ed1c74fa15aff69c7.diff

LOG: [DFSAN] Add support for strsep.

Reviewed-by: browneee

Differential Revision: https://reviews.llvm.org/D141389

Added: 
    

Modified: 
    compiler-rt/lib/dfsan/dfsan_custom.cpp
    compiler-rt/lib/dfsan/done_abilist.txt
    compiler-rt/test/dfsan/custom.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/dfsan/dfsan_custom.cpp b/compiler-rt/lib/dfsan/dfsan_custom.cpp
index 6f41e225d9e8c..8bb5d39ee8f24 100644
--- a/compiler-rt/lib/dfsan/dfsan_custom.cpp
+++ b/compiler-rt/lib/dfsan/dfsan_custom.cpp
@@ -204,6 +204,57 @@ SANITIZER_INTERFACE_ATTRIBUTE char *__dfso_strpbrk(
   return const_cast<char *>(ret);
 }
 
+SANITIZER_INTERFACE_ATTRIBUTE char *__dfsw_strsep(char **s, const char *delim,
+                                                  dfsan_label s_label,
+                                                  dfsan_label delim_label,
+                                                  dfsan_label *ret_label) {
+  dfsan_label base_label = dfsan_read_label(s, sizeof(*s));
+  char *base = *s;
+  char *res = strsep(s, delim);
+  if (res != *s) {
+    char *token_start = res;
+    int token_length = strlen(res);
+    // the delimiter byte has been set to NULL
+    dfsan_set_label(0, token_start + token_length, 1);
+  }
+
+  if (flags().strict_data_dependencies) {
+    *ret_label = res ? base_label : 0;
+  } else {
+    size_t s_bytes_read = (res ? strlen(res) : strlen(base)) + 1;
+    *ret_label = dfsan_union(
+        dfsan_union(base_label, dfsan_read_label(base, sizeof(s_bytes_read))),
+        dfsan_union(dfsan_read_label(delim, strlen(delim) + 1),
+                    dfsan_union(s_label, delim_label)));
+  }
+
+  return res;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE char *__dfso_strsep(
+    char **s, const char *delim, dfsan_label s_label, dfsan_label delim_label,
+    dfsan_label *ret_label, dfsan_origin s_origin, dfsan_origin delim_origin,
+    dfsan_origin *ret_origin) {
+  dfsan_origin base_origin = dfsan_read_origin_of_first_taint(s, sizeof(*s));
+  char *res = __dfsw_strsep(s, delim, s_label, delim_label, ret_label);
+  if (flags().strict_data_dependencies) {
+    if (res)
+      *ret_origin = base_origin;
+  } else {
+    if (*ret_label) {
+      if (base_origin) {
+        *ret_origin = base_origin;
+      } else {
+        dfsan_origin o =
+            dfsan_read_origin_of_first_taint(delim, strlen(delim) + 1);
+        *ret_origin = o ? o : (s_label ? s_origin : delim_origin);
+      }
+    }
+  }
+
+  return res;
+}
+
 static int dfsan_memcmp_bcmp(const void *s1, const void *s2, size_t n,
                              size_t *bytes_read) {
   const char *cs1 = (const char *) s1, *cs2 = (const char *) s2;

diff  --git a/compiler-rt/lib/dfsan/done_abilist.txt b/compiler-rt/lib/dfsan/done_abilist.txt
index ff8a37fbf426f..88ec5cf504bae 100644
--- a/compiler-rt/lib/dfsan/done_abilist.txt
+++ b/compiler-rt/lib/dfsan/done_abilist.txt
@@ -283,6 +283,7 @@ fun:strncmp=custom
 fun:strpbrk=custom
 fun:strrchr=custom
 fun:strstr=custom
+fun:strsep=custom
 
 # Functions which take action based on global state, such as running a callback
 # set by a separate function.

diff  --git a/compiler-rt/test/dfsan/custom.cpp b/compiler-rt/test/dfsan/custom.cpp
index 6808cb19dd05a..6dbf0d71c9663 100644
--- a/compiler-rt/test/dfsan/custom.cpp
+++ b/compiler-rt/test/dfsan/custom.cpp
@@ -1630,6 +1630,51 @@ void test_strpbrk() {
 #endif
 }
 
+void test_strsep() {
+  char *s = strdup("Hello world/");
+  char *delim = strdup(" /");
+
+  char *p_s = s;
+  char *base = s;
+  char *p_delim = delim;
+
+  // taint delim bytes
+  dfsan_set_label(i_label, p_delim, strlen(p_delim));
+  // taint delim pointer
+  dfsan_set_label(j_label, &p_delim, sizeof(p_delim));
+  // taint the string data bytes
+  dfsan_set_label(k_label, s, 5);
+  // taint the string pointer
+  dfsan_set_label(m_label, &p_s, sizeof(p_s));
+
+  char *rv = strsep(&p_s, p_delim);
+  assert(rv == &base[0]);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_LABEL(rv, m_label);
+  ASSERT_READ_LABEL(rv, strlen(rv), k_label);
+#else
+  ASSERT_LABEL(rv, dfsan_union(dfsan_union(i_label, j_label),
+                               dfsan_union(k_label, m_label)));
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(&rv, p_s);
+#endif
+
+  // taint the remaining string's pointer
+  char **pp_s = &p_s;
+  char **pp_s_base = pp_s;
+  dfsan_set_label(n_label, pp_s, sizeof(pp_s));
+
+  rv = strsep(pp_s, p_delim);
+
+  assert(rv == &base[6]);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_LABEL(rv, n_label);
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(&rv, *pp_s);
+#else
+  ASSERT_LABEL(rv, dfsan_union(dfsan_union(i_label, j_label), n_label));
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(&rv, *pp_s);
+#endif
+}
+
 void test_memchr() {
   char str1[] = "str1";
   dfsan_set_label(i_label, &str1[3], 1);
@@ -2044,6 +2089,7 @@ int main(void) {
   test_strncmp();
   test_strncpy();
   test_strpbrk();
+  test_strsep();
   test_strrchr();
   test_strstr();
   test_strtod();


        


More information about the llvm-commits mailing list