[compiler-rt] r326008 - Correct ctype(3) functions with NLS on NetBSD

Kamil Rytarowski via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 23 17:01:14 PST 2018


Author: kamil
Date: Fri Feb 23 17:01:14 2018
New Revision: 326008

URL: http://llvm.org/viewvc/llvm-project?rev=326008&view=rev
Log:
Correct ctype(3) functions with NLS on NetBSD

Summary:
The setlocale(3) function reloads the ctype(3) arrays from
external files. This happens behind the scenes in the internals
of libc (citrus library, runes functions etc).

ctype(3) functions like isspace(3) can be provided with two
variations on NetBSD: inlined or via a global symbol in libc:

```
#if defined(_NETBSD_SOURCE) && !defined(_CTYPE_NOINLINE) && \
    !defined(__cplusplus)
#include <sys/ctype_inline.h>
#else
#include <sys/ctype_bits.h>
#endif
```

The in-lined versions are de-facto array lookup operations.

```
#define isspace(c)      ((int)((_ctype_tab_ + 1)[(c)] & _CTYPE_S))
```

After setting setlocale(3) the ctype(3) arrays (_ctype_tab_,
_toupper_tab_, _tolower_tab_) are reload behind the scenes
and they are required to be marked as initialized.

Set them initialized inside the common setlocale(3) interceptor.

The arrays are of size of 257 elements: 0..255 + 1 (EOF).

This corrects errors on NetBSD/amd64 in applications
prebuilt with MSan.

Sponsored by <The NetBSD Foundation>

Reviewers: vitalybuka, dvyukov, joerg

Reviewed By: vitalybuka

Subscribers: llvm-commits, kubamracek, #sanitizers

Tags: #sanitizers

Differential Revision: https://reviews.llvm.org/D42020

Added:
    compiler-rt/trunk/test/sanitizer_common/TestCases/ctype.c
Modified:
    compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors.inc

Modified: compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors.inc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors.inc?rev=326008&r1=326007&r2=326008&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors.inc (original)
+++ compiler-rt/trunk/lib/sanitizer_common/sanitizer_common_interceptors.inc Fri Feb 23 17:01:14 2018
@@ -110,6 +110,9 @@
 #define times __times13
 #define wait3 __wait350
 #define wait4 __wait450
+extern const unsigned short *_ctype_tab_;
+extern const short *_toupper_tab_;
+extern const short *_tolower_tab_;
 #endif
 
 // Platform-specific options.
@@ -3186,13 +3189,25 @@ INTERCEPTOR(uptr, ptrace, int request, i
 #endif
 
 #if SANITIZER_INTERCEPT_SETLOCALE
+static void unpoison_ctype_arrays(void *ctx) {
+#if SANITIZER_NETBSD
+  // These arrays contain 256 regular elements in unsigned char range + 1 EOF
+  COMMON_INTERCEPTOR_WRITE_RANGE(ctx, _ctype_tab_, 257 * sizeof(short));
+  COMMON_INTERCEPTOR_WRITE_RANGE(ctx, _toupper_tab_, 257 * sizeof(short));
+  COMMON_INTERCEPTOR_WRITE_RANGE(ctx, _tolower_tab_, 257 * sizeof(short));
+#endif
+}
+
 INTERCEPTOR(char *, setlocale, int category, char *locale) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, setlocale, category, locale);
   if (locale)
     COMMON_INTERCEPTOR_READ_RANGE(ctx, locale, REAL(strlen)(locale) + 1);
   char *res = REAL(setlocale)(category, locale);
-  if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
+  if (res) {
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
+    unpoison_ctype_arrays(ctx);
+  }
   return res;
 }
 

Added: compiler-rt/trunk/test/sanitizer_common/TestCases/ctype.c
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/sanitizer_common/TestCases/ctype.c?rev=326008&view=auto
==============================================================================
--- compiler-rt/trunk/test/sanitizer_common/TestCases/ctype.c (added)
+++ compiler-rt/trunk/test/sanitizer_common/TestCases/ctype.c Fri Feb 23 17:01:14 2018
@@ -0,0 +1,89 @@
+// RUN: %clang %s -o %t && %run %t 2>&1 | FileCheck %s
+
+#include <ctype.h>
+#include <limits.h>
+#include <locale.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+void check_ctype(void) {
+  unsigned char c;
+  volatile size_t i = 0; /* a dummy variable to prevent optimizing code out */
+
+  for (c = 0; c < UCHAR_MAX; c++)
+    i += !!isalpha(c);
+  for (c = 0; c < UCHAR_MAX; c++)
+    i += !!isascii(c);
+  for (c = 0; c < UCHAR_MAX; c++)
+    i += !!isblank(c);
+  for (c = 0; c < UCHAR_MAX; c++)
+    i += !!iscntrl(c);
+  for (c = 0; c < UCHAR_MAX; c++)
+    i += !!isdigit(c);
+  for (c = 0; c < UCHAR_MAX; c++)
+    i += !!isgraph(c);
+  for (c = 0; c < UCHAR_MAX; c++)
+    i += !!islower(c);
+  for (c = 0; c < UCHAR_MAX; c++)
+    i += !!isprint(c);
+  for (c = 0; c < UCHAR_MAX; c++)
+    i += !!ispunct(c);
+  for (c = 0; c < UCHAR_MAX; c++)
+    i += !!isspace(c);
+  for (c = 0; c < UCHAR_MAX; c++)
+    i += !!isupper(c);
+  for (c = 0; c < UCHAR_MAX; c++)
+    i += !!isxdigit(c);
+  for (c = 0; c < UCHAR_MAX; c++)
+    i += !!isalnum(c);
+
+  for (c = 0; c < UCHAR_MAX; c++)
+    i += !!tolower(c);
+  for (c = 0; c < UCHAR_MAX; c++)
+    i += !!toupper(c);
+
+  i += !!isalpha(EOF);
+  i += !!isascii(EOF);
+  i += !!isblank(EOF);
+  i += !!iscntrl(EOF);
+  i += !!isdigit(EOF);
+  i += !!isgraph(EOF);
+  i += !!islower(EOF);
+  i += !!isprint(EOF);
+  i += !!ispunct(EOF);
+  i += !!isspace(EOF);
+  i += !!isupper(EOF);
+  i += !!isxdigit(EOF);
+  i += !!isalnum(EOF);
+
+  i += !!tolower(EOF);
+  i += !!toupper(EOF);
+
+  if (i)
+    return;
+  else
+    return;
+}
+
+int main(int argc, char **argv) {
+  check_ctype();
+
+  setlocale(LC_ALL, "");
+
+  check_ctype();
+
+  setlocale(LC_ALL, "en_US.UTF-8");
+
+  check_ctype();
+
+  setlocale(LC_CTYPE, "pl_PL.UTF-8");
+
+  check_ctype();
+
+  printf("OK\n");
+
+  // CHECK: OK
+
+  return 0;
+}




More information about the llvm-commits mailing list