[compiler-rt] r348224 - Add new interceptor for regex(3) in NetBSD
Kamil Rytarowski via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 3 17:41:42 PST 2018
Author: kamil
Date: Mon Dec 3 17:41:42 2018
New Revision: 348224
URL: http://llvm.org/viewvc/llvm-project?rev=348224&view=rev
Log:
Add new interceptor for regex(3) in NetBSD
Summary:
Add interceptors for the NetBSD style of regex(3) present inside libc:
- regcomp
- regexec
- regerror
- regfree
- regnsub
- regasub
Add a dedicated test verifying the installed interceptors.
Reviewers: vitalybuka, joerg
Reviewed By: vitalybuka
Subscribers: kubamracek, llvm-commits, mgorny, #sanitizers
Tags: #sanitizers
Differential Revision: https://reviews.llvm.org/D54584
Added:
compiler-rt/trunk/test/sanitizer_common/TestCases/NetBSD/regex.cc
Modified:
compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors.inc
compiler-rt/trunk/lib/sanitizer_common/sanitizer_platform_interceptors.h
compiler-rt/trunk/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc
compiler-rt/trunk/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h
Modified: compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors.inc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors.inc?rev=348224&r1=348223&r2=348224&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors.inc (original)
+++ compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors.inc Mon Dec 3 17:41:42 2018
@@ -7362,6 +7362,93 @@ INTERCEPTOR(int, getvfsstat, void *buf,
#define INIT_GETVFSSTAT
#endif
+#if SANITIZER_INTERCEPT_REGEX
+INTERCEPTOR(int, regcomp, void *preg, const char *pattern, int cflags) {
+ void *ctx;
+ COMMON_INTERCEPTOR_ENTER(ctx, regcomp, preg, pattern, cflags);
+ if (pattern)
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, pattern, REAL(strlen)(pattern) + 1);
+ int res = REAL(regcomp)(preg, pattern, cflags);
+ if (!res)
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, preg, struct_regex_sz);
+ return res;
+}
+INTERCEPTOR(int, regexec, const void *preg, const char *string, SIZE_T nmatch,
+ struct __sanitizer_regmatch *pmatch[], int eflags) {
+ void *ctx;
+ COMMON_INTERCEPTOR_ENTER(ctx, regexec, preg, string, nmatch, pmatch, eflags);
+ if (preg)
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, preg, struct_regex_sz);
+ if (string)
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, string, REAL(strlen)(string) + 1);
+ int res = REAL(regexec)(preg, string, nmatch, pmatch, eflags);
+ if (!res && pmatch)
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pmatch, nmatch * struct_regmatch_sz);
+ return res;
+}
+INTERCEPTOR(SIZE_T, regerror, int errcode, const void *preg, char *errbuf,
+ SIZE_T errbuf_size) {
+ void *ctx;
+ COMMON_INTERCEPTOR_ENTER(ctx, regerror, errcode, preg, errbuf, errbuf_size);
+ if (preg)
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, preg, struct_regex_sz);
+ SIZE_T res = REAL(regerror)(errcode, preg, errbuf, errbuf_size);
+ if (errbuf && res > 0)
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, errbuf, res);
+ return res;
+}
+INTERCEPTOR(void, regfree, const void *preg) {
+ void *ctx;
+ COMMON_INTERCEPTOR_ENTER(ctx, regfree, preg);
+ if (preg)
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, preg, struct_regex_sz);
+ REAL(regfree)(preg);
+}
+INTERCEPTOR(SSIZE_T, regnsub, char *buf, SIZE_T bufsiz, const char *sub,
+ const struct __sanitizer_regmatch *rm, const char *str) {
+ void *ctx;
+ COMMON_INTERCEPTOR_ENTER(ctx, regnsub, buf, bufsiz, sub, rm, str);
+ if (sub)
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, sub, REAL(strlen)(sub) + 1);
+ // The implementation demands and hardcodes 10 elements
+ if (rm)
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, rm, 10 * struct_regmatch_sz);
+ if (str)
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, str, REAL(strlen)(str) + 1);
+ SSIZE_T res = REAL(regnsub)(buf, bufsiz, sub, rm, str);
+ if (res > 0 && buf)
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, REAL(strlen)(buf) + 1);
+ return res;
+}
+INTERCEPTOR(SSIZE_T, regasub, char **buf, const char *sub,
+ const struct __sanitizer_regmatch *rm, const char *sstr) {
+ void *ctx;
+ COMMON_INTERCEPTOR_ENTER(ctx, regasub, buf, sub, rm, sstr);
+ if (sub)
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, sub, REAL(strlen)(sub) + 1);
+ // Hardcode 10 elements as this is hardcoded size
+ if (rm)
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, rm, 10 * struct_regmatch_sz);
+ if (sstr)
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, sstr, REAL(strlen)(sstr) + 1);
+ SSIZE_T res = REAL(regasub)(buf, sub, rm, sstr);
+ if (res > 0 && buf) {
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, sizeof(char *));
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *buf, REAL(strlen)(*buf) + 1);
+ }
+ return res;
+}
+#define INIT_REGEX \
+ COMMON_INTERCEPT_FUNCTION(regcomp); \
+ COMMON_INTERCEPT_FUNCTION(regexec); \
+ COMMON_INTERCEPT_FUNCTION(regerror); \
+ COMMON_INTERCEPT_FUNCTION(regfree); \
+ COMMON_INTERCEPT_FUNCTION(regnsub); \
+ COMMON_INTERCEPT_FUNCTION(regasub);
+#else
+#define INIT_REGEX
+#endif
+
static void InitializeCommonInterceptors() {
static u64 metadata_mem[sizeof(MetadataHashMap) / sizeof(u64) + 1];
interceptor_metadata_map = new((void *)&metadata_mem) MetadataHashMap();
@@ -7619,6 +7706,7 @@ static void InitializeCommonInterceptors
INIT_MI_VECTOR_HASH;
INIT_SETVBUF;
INIT_GETVFSSTAT;
+ INIT_REGEX;
INIT___PRINTF_CHK;
}
Modified: compiler-rt/trunk/lib/sanitizer_common/sanitizer_platform_interceptors.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/sanitizer_platform_interceptors.h?rev=348224&r1=348223&r2=348224&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/sanitizer_platform_interceptors.h (original)
+++ compiler-rt/trunk/lib/sanitizer_common/sanitizer_platform_interceptors.h Mon Dec 3 17:41:42 2018
@@ -521,5 +521,6 @@
#define SANITIZER_INTERCEPT_GETMNTINFO SI_NETBSD
#define SANITIZER_INTERCEPT_MI_VECTOR_HASH SI_NETBSD
#define SANITIZER_INTERCEPT_GETVFSSTAT SI_NETBSD
+#define SANITIZER_INTERCEPT_REGEX SI_NETBSD
#endif // #ifndef SANITIZER_PLATFORM_INTERCEPTORS_H
Modified: compiler-rt/trunk/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc?rev=348224&r1=348223&r2=348224&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc (original)
+++ compiler-rt/trunk/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc Mon Dec 3 17:41:42 2018
@@ -253,6 +253,8 @@ unsigned struct_rlimit_sz = sizeof(struc
unsigned struct_timespec_sz = sizeof(struct timespec);
unsigned struct_sembuf_sz = sizeof(struct sembuf);
unsigned struct_kevent_sz = sizeof(struct kevent);
+unsigned struct_regex_sz = sizeof(regex_t);
+unsigned struct_regmatch_sz = sizeof(regmatch_t);
unsigned struct_utimbuf_sz = sizeof(struct utimbuf);
unsigned struct_itimerspec_sz = sizeof(struct itimerspec);
unsigned struct_timex_sz = sizeof(struct timex);
Modified: compiler-rt/trunk/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h?rev=348224&r1=348223&r2=348224&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h (original)
+++ compiler-rt/trunk/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h Mon Dec 3 17:41:42 2018
@@ -61,6 +61,14 @@ extern unsigned struct_sembuf_sz;
extern unsigned struct_kevent_sz;
+extern unsigned struct_regex_sz;
+extern unsigned struct_regmatch_sz;
+
+struct __sanitizer_regmatch {
+ OFF_T rm_so;
+ OFF_T rm_eo;
+};
+
union __sanitizer_sigval {
int sival_int;
uptr sival_ptr;
Added: compiler-rt/trunk/test/sanitizer_common/TestCases/NetBSD/regex.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/sanitizer_common/TestCases/NetBSD/regex.cc?rev=348224&view=auto
==============================================================================
--- compiler-rt/trunk/test/sanitizer_common/TestCases/NetBSD/regex.cc (added)
+++ compiler-rt/trunk/test/sanitizer_common/TestCases/NetBSD/regex.cc Mon Dec 3 17:41:42 2018
@@ -0,0 +1,101 @@
+// RUN: %clangxx -O0 -g %s -o %t && %run %t 2>&1 | FileCheck %s
+
+#include <assert.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+void test_matched(const regex_t *preg, const char *string) {
+ int rv = regexec(preg, string, 0, NULL, 0);
+ if (!rv)
+ printf("%s: matched\n", string);
+ else if (rv == REG_NOMATCH)
+ printf("%s: not-matched\n", string);
+ else
+ abort();
+}
+
+void test_print_matches(const regex_t *preg, const char *string) {
+ regmatch_t rm[10];
+ int rv = regexec(preg, string, __arraycount(rm), rm, 0);
+ if (!rv) {
+ for (size_t i = 0; i < __arraycount(rm); i++) {
+ // This condition shall be simplified, but verify that the data fields
+ // are accessible.
+ if (rm[i].rm_so == -1 && rm[i].rm_eo == -1)
+ continue;
+ printf("matched[%zu]='%.*s'\n", i, (int)(rm[i].rm_eo - rm[i].rm_so),
+ string + rm[i].rm_so);
+ }
+ } else if (rv == REG_NOMATCH)
+ printf("%s: not-matched\n", string);
+ else
+ abort();
+}
+
+void test_nsub(const regex_t *preg, const char *string) {
+ regmatch_t rm[10];
+ int rv = regexec(preg, string, __arraycount(rm), rm, 0);
+ if (!rv) {
+ char buf[1024];
+ ssize_t ss = regnsub(buf, __arraycount(buf), "\\1xyz", rm, string);
+ assert(ss != -1);
+
+ printf("'%s' -> '%s'\n", string, buf);
+ } else if (rv == REG_NOMATCH)
+ printf("%s: not-matched\n", string);
+ else
+ abort();
+}
+
+void test_asub(const regex_t *preg, const char *string) {
+ regmatch_t rm[10];
+ int rv = regexec(preg, string, __arraycount(rm), rm, 0);
+ if (!rv) {
+ char *buf;
+ ssize_t ss = regasub(&buf, "\\1xyz", rm, string);
+ assert(ss != -1);
+
+ printf("'%s' -> '%s'\n", string, buf);
+ free(buf);
+ } else if (rv == REG_NOMATCH)
+ printf("%s: not-matched\n", string);
+ else
+ abort();
+}
+
+int main(void) {
+ printf("regex\n");
+
+ regex_t regex;
+ int rv = regcomp(®ex, "[[:upper:]]\\([[:upper:]]\\)", 0);
+ assert(!rv);
+
+ test_matched(®ex, "abc");
+ test_matched(®ex, "ABC");
+
+ test_print_matches(®ex, "ABC");
+
+ test_nsub(®ex, "ABC DEF");
+ test_asub(®ex, "GHI JKL");
+
+ regfree(®ex);
+
+ rv = regcomp(®ex, "[[:upp:]]", 0);
+ assert(rv);
+
+ char errbuf[1024];
+ regerror(rv, ®ex, errbuf, sizeof errbuf);
+ printf("error: %s\n", errbuf);
+
+ // CHECK: regex
+ // CHECK: abc: not-matched
+ // CHECK: ABC: matched
+ // CHECK: matched[0]='AB'
+ // CHECK: matched[1]='B'
+ // CHECK: 'ABC DEF' -> 'Bxyz'
+ // CHECK: 'GHI JKL' -> 'Hxyz'
+ // CHECK: error:{{.*}}
+
+ return 0;
+}
More information about the llvm-commits
mailing list