[compiler-rt] [compiler-rt][nsan] Fix strsep interceptor (PR #106307)

via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 27 16:22:12 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Alexander Shaposhnikov (alexander-shaposhnikov)

<details>
<summary>Changes</summary>

Fix strsep interceptor.
For strsep description see https://www.man7.org/linux/man-pages/man3/strsep.3.html

---
Full diff: https://github.com/llvm/llvm-project/pull/106307.diff


2 Files Affected:

- (modified) compiler-rt/lib/nsan/nsan_interceptors.cpp (+7-4) 
- (added) compiler-rt/test/nsan/intercep_strsep.cpp (+25) 


``````````diff
diff --git a/compiler-rt/lib/nsan/nsan_interceptors.cpp b/compiler-rt/lib/nsan/nsan_interceptors.cpp
index d607c8d6a636b8..f3422b92d0aeec 100644
--- a/compiler-rt/lib/nsan/nsan_interceptors.cpp
+++ b/compiler-rt/lib/nsan/nsan_interceptors.cpp
@@ -93,11 +93,14 @@ INTERCEPTOR(char *, strfry, char *s) {
 }
 
 INTERCEPTOR(char *, strsep, char **Stringp, const char *delim) {
+  if (*Stringp == nullptr)
+    return nullptr;
   char *OrigStringp = REAL(strsep)(Stringp, delim);
-  if (Stringp != nullptr) {
-    // The previous character has been overwritten with a '\0' char.
-    __nsan_set_value_unknown(reinterpret_cast<u8 *>(*Stringp) - 1, 1);
-  }
+  // No delimiter was found, the token is taken to be the entire string.
+  if (*Stringp == nullptr)
+    return nullptr;
+  // The previous character has been overwritten with a '\0' char.
+  __nsan_set_value_unknown(reinterpret_cast<u8 *>(*Stringp) - 1, 1);
   return OrigStringp;
 }
 
diff --git a/compiler-rt/test/nsan/intercep_strsep.cpp b/compiler-rt/test/nsan/intercep_strsep.cpp
new file mode 100644
index 00000000000000..69accd53c4d3ad
--- /dev/null
+++ b/compiler-rt/test/nsan/intercep_strsep.cpp
@@ -0,0 +1,25 @@
+// RUN: %clangxx_nsan -O2 %s -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+
+#include <iostream>
+#include <cstring>
+
+extern "C" void __nsan_dump_shadow_mem(const char *addr, size_t size_bytes,
+                                       size_t bytes_per_line, size_t reserved);
+
+int main() {
+    // Define a C-style string with commas as delimiters
+    char input[] = "apple,banana,cherry,date";
+    char* token;
+    char* rest = input; // Pointer to keep track of the rest of the string
+
+    // Tokenize the string using strsep
+    while ((token = strsep(&rest, ",")) != NULL) {
+        std::cout << token << std::endl;
+    }
+
+    __nsan_dump_shadow_mem(&input[5], 1, 1, 0);
+    // CHECK: 0x{{[a-f0-9]*}}:    _
+    return 0;
+}
+

``````````

</details>


https://github.com/llvm/llvm-project/pull/106307


More information about the llvm-commits mailing list