[compiler-rt] [compiler-rt][nsan] Fix strsep interceptor (PR #106307)
Alexander Shaposhnikov via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 27 16:27:58 PDT 2024
https://github.com/alexander-shaposhnikov updated https://github.com/llvm/llvm-project/pull/106307
>From 3509d7f94c1b85537552fcc28e34e3b14646b7d7 Mon Sep 17 00:00:00 2001
From: Alexander Shaposhnikov <ashaposhnikov at google.com>
Date: Tue, 27 Aug 2024 23:09:49 +0000
Subject: [PATCH] [compiler-rt][nsan] Fix strsep interceptor
---
compiler-rt/lib/nsan/nsan_interceptors.cpp | 11 ++++++----
compiler-rt/test/nsan/intercep_strsep.cpp | 24 ++++++++++++++++++++++
2 files changed, 31 insertions(+), 4 deletions(-)
create mode 100644 compiler-rt/test/nsan/intercep_strsep.cpp
diff --git a/compiler-rt/lib/nsan/nsan_interceptors.cpp b/compiler-rt/lib/nsan/nsan_interceptors.cpp
index d607c8d6a636b8..f3422b92d0aeec 100644
--- a/compiler-rt/lib/nsan/nsan_interceptors.cpp
+++ b/compiler-rt/lib/nsan/nsan_interceptors.cpp
@@ -93,11 +93,14 @@ INTERCEPTOR(char *, strfry, char *s) {
}
INTERCEPTOR(char *, strsep, char **Stringp, const char *delim) {
+ if (*Stringp == nullptr)
+ return nullptr;
char *OrigStringp = REAL(strsep)(Stringp, delim);
- if (Stringp != nullptr) {
- // The previous character has been overwritten with a '\0' char.
- __nsan_set_value_unknown(reinterpret_cast<u8 *>(*Stringp) - 1, 1);
- }
+ // No delimiter was found, the token is taken to be the entire string.
+ if (*Stringp == nullptr)
+ return nullptr;
+ // The previous character has been overwritten with a '\0' char.
+ __nsan_set_value_unknown(reinterpret_cast<u8 *>(*Stringp) - 1, 1);
return OrigStringp;
}
diff --git a/compiler-rt/test/nsan/intercep_strsep.cpp b/compiler-rt/test/nsan/intercep_strsep.cpp
new file mode 100644
index 00000000000000..4ae676fe20dcb1
--- /dev/null
+++ b/compiler-rt/test/nsan/intercep_strsep.cpp
@@ -0,0 +1,24 @@
+// RUN: %clangxx_nsan -O2 %s -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+
+#include <cstring>
+#include <iostream>
+
+extern "C" void __nsan_dump_shadow_mem(const char *addr, size_t size_bytes,
+ size_t bytes_per_line, size_t reserved);
+
+int main() {
+ // Define a C-style string with commas as delimiters
+ char input[] = "apple,banana,cherry,date";
+ char *token;
+ char *rest = input; // Pointer to keep track of the rest of the string
+
+ // Tokenize the string using strsep
+ while ((token = strsep(&rest, ",")) != NULL) {
+ std::cout << token << std::endl;
+ }
+
+ __nsan_dump_shadow_mem(&input[5], 1, 1, 0);
+ // CHECK: 0x{{[a-f0-9]*}}: _
+ return 0;
+}
More information about the llvm-commits
mailing list