[compiler-rt] [compiler-rt][nsan] Fix strsep interceptor (PR #106307)

Alexander Shaposhnikov via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 27 16:27:58 PDT 2024


https://github.com/alexander-shaposhnikov updated https://github.com/llvm/llvm-project/pull/106307

>From 3509d7f94c1b85537552fcc28e34e3b14646b7d7 Mon Sep 17 00:00:00 2001
From: Alexander Shaposhnikov <ashaposhnikov at google.com>
Date: Tue, 27 Aug 2024 23:09:49 +0000
Subject: [PATCH] [compiler-rt][nsan] Fix strsep interceptor

---
 compiler-rt/lib/nsan/nsan_interceptors.cpp | 11 ++++++----
 compiler-rt/test/nsan/intercep_strsep.cpp  | 24 ++++++++++++++++++++++
 2 files changed, 31 insertions(+), 4 deletions(-)
 create mode 100644 compiler-rt/test/nsan/intercep_strsep.cpp

diff --git a/compiler-rt/lib/nsan/nsan_interceptors.cpp b/compiler-rt/lib/nsan/nsan_interceptors.cpp
index d607c8d6a636b8..f3422b92d0aeec 100644
--- a/compiler-rt/lib/nsan/nsan_interceptors.cpp
+++ b/compiler-rt/lib/nsan/nsan_interceptors.cpp
@@ -93,11 +93,14 @@ INTERCEPTOR(char *, strfry, char *s) {
 }
 
 INTERCEPTOR(char *, strsep, char **Stringp, const char *delim) {
+  if (*Stringp == nullptr)
+    return nullptr;
   char *OrigStringp = REAL(strsep)(Stringp, delim);
-  if (Stringp != nullptr) {
-    // The previous character has been overwritten with a '\0' char.
-    __nsan_set_value_unknown(reinterpret_cast<u8 *>(*Stringp) - 1, 1);
-  }
+  // No delimiter was found, the token is taken to be the entire string.
+  if (*Stringp == nullptr)
+    return nullptr;
+  // The previous character has been overwritten with a '\0' char.
+  __nsan_set_value_unknown(reinterpret_cast<u8 *>(*Stringp) - 1, 1);
   return OrigStringp;
 }
 
diff --git a/compiler-rt/test/nsan/intercep_strsep.cpp b/compiler-rt/test/nsan/intercep_strsep.cpp
new file mode 100644
index 00000000000000..4ae676fe20dcb1
--- /dev/null
+++ b/compiler-rt/test/nsan/intercep_strsep.cpp
@@ -0,0 +1,24 @@
+// RUN: %clangxx_nsan -O2 %s -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+
+#include <cstring>
+#include <iostream>
+
+extern "C" void __nsan_dump_shadow_mem(const char *addr, size_t size_bytes,
+                                       size_t bytes_per_line, size_t reserved);
+
+int main() {
+  // Define a C-style string with commas as delimiters
+  char input[] = "apple,banana,cherry,date";
+  char *token;
+  char *rest = input; // Pointer to keep track of the rest of the string
+
+  // Tokenize the string using strsep
+  while ((token = strsep(&rest, ",")) != NULL) {
+    std::cout << token << std::endl;
+  }
+
+  __nsan_dump_shadow_mem(&input[5], 1, 1, 0);
+  // CHECK: 0x{{[a-f0-9]*}}:    _
+  return 0;
+}



More information about the llvm-commits mailing list