[clang] [compiler-rt] [ubsan] Display correct runtime messages for negative _BitInt (PR #93612)

Thu May 30 15:50:24 PDT 2024

=?utf-8?q?Eänolituri_Lómitaurë?Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/93612 at github.com>


https://github.com/earnol updated https://github.com/llvm/llvm-project/pull/93612

>From 1d58ee62ddf4ebf4f8f21b47e7240d3df9ef4ea3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?E=C3=A4nolituri=20L=C3=B3mitaur=C3=AB?=
 <eanoliture at gmail.com>
Date: Fri, 24 May 2024 11:39:35 -0400
Subject: [PATCH 1/2] [ubsan] Display correct runtime messages for negative
 _BitInt

Without this patch compiler-rt ubsan library has a bug displaying
incorrect values for variables of the _BitInt (previousely called
_ExtInt) type. This patch affects affects both: generation of metadata
inside code generator and runtime part. The runtime part provided only
for i386 and x86_64 runtimes. Other runtimes should be updated to take
full benefit of this patch.
The patch is constructed the way to be backward compatible and
int and float type runtime diagnostics should be unaffected for not
yet apdated runtimes.
---
 clang/lib/CodeGen/CGExpr.cpp                  |  56 +++++-
 compiler-rt/lib/ubsan/ubsan_value.cpp         |  17 +-
 compiler-rt/lib/ubsan/ubsan_value.h           |  35 +++-
 .../test/ubsan/TestCases/Integer/bit-int.c    | 188 ++++++++++++++++++
 4 files changed, 283 insertions(+), 13 deletions(-)
 create mode 100644 compiler-rt/test/ubsan/TestCases/Integer/bit-int.c

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index d6478cc6835d8..e80bdfd51ceec 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -41,6 +41,7 @@
 #include "llvm/IR/MatrixBuilder.h"
 #include "llvm/Passes/OptimizationLevel.h"
 #include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SaveAndRestore.h"
@@ -64,6 +65,20 @@ static llvm::cl::opt<bool> ClSanitizeGuardChecks(
     "ubsan-guard-checks", llvm::cl::Optional,
     llvm::cl::desc("Guard UBSAN checks with `llvm.allow.ubsan.check()`."));
 
+//===--------------------------------------------------------------------===//
+//                        Defines for metadata
+//===--------------------------------------------------------------------===//
+enum VariableTypeDescriptorKind : uint16_t {
+  /// An integer type.
+  TK_Integer = 0x0000,
+  /// A floating-point type.
+  TK_Float = 0x0001,
+  /// An _BitInt(N) type.
+  TK_BitInt = 0x0002,
+  /// Any other type. The value representation is unspecified.
+  TK_Unknown = 0xffff
+};
+
 //===--------------------------------------------------------------------===//
 //                        Miscellaneous Helper Methods
 //===--------------------------------------------------------------------===//
@@ -3292,22 +3307,39 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) {
 ///   { i16 TypeKind, i16 TypeInfo }
 /// \endcode
 ///
-/// followed by an array of i8 containing the type name. TypeKind is 0 for an
-/// integer, 1 for a floating point value, and -1 for anything else.
+/// followed by an array of i8 containing the type name with extra information
+/// for BitInt. TypeKind is 0 for an integer, 1 for a floating point value, 2
+/// for BitInt and -1 for anything else.
 llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) {
   // Only emit each type's descriptor once.
   if (llvm::Constant *C = CGM.getTypeDescriptorFromMap(T))
     return C;
 
-  uint16_t TypeKind = -1;
+  uint16_t TypeKind = TK_Unknown;
   uint16_t TypeInfo = 0;
+  bool IsBitInt = false;
 
   if (T->isIntegerType()) {
-    TypeKind = 0;
+    TypeKind = TK_Integer;
     TypeInfo = (llvm::Log2_32(getContext().getTypeSize(T)) << 1) |
                (T->isSignedIntegerType() ? 1 : 0);
+    // Follow suggestion from https://github.com/llvm/llvm-project/issues/64100
+    // So we can write the exact amount of bits in TypeName after '\0'
+    // making it <diagnostic-like type name>.'\0'.<32-bit width>.
+    if (T->isSignedIntegerType() && T->getAs<BitIntType>()) {
+      // Do a sanity checks as we are using 32-bit type to store bit length.
+      assert((getContext().getTypeSize(T) > 0) &&
+             " non positive amount of bits in __BitInt type");
+      assert((getContext().getTypeSize(T) <= 0xFFFFFFFF) &&
+             " too many bits in __BitInt type");
+
+      // Redefine TypeKind with the actual __BitInt type if we have signed
+      // BitInt.
+      TypeKind = TK_BitInt;
+      IsBitInt = true;
+    }
   } else if (T->isFloatingType()) {
-    TypeKind = 1;
+    TypeKind = TK_Float;
     TypeInfo = getContext().getTypeSize(T);
   }
 
@@ -3318,6 +3350,20 @@ llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) {
       DiagnosticsEngine::ak_qualtype, (intptr_t)T.getAsOpaquePtr(), StringRef(),
       StringRef(), std::nullopt, Buffer, std::nullopt);
 
+  if (IsBitInt) {
+    // The Structure is: 0 to end the string, 32 bit insigned integer in target
+    // endianness, zero.
+    char s[6] = {'\0', '\0', '\0', '\0', '\0', '\0'};
+    const auto *EIT = T->getAs<BitIntType>();
+    uint32_t Bits = EIT->getNumBits();
+    llvm::support::endian::write32(s + 1, Bits,
+                                   getTarget().isBigEndian()
+                                       ? llvm::endianness::big
+                                       : llvm::endianness::little);
+    StringRef str = StringRef(s, 6);
+    Buffer.append(str);
+  }
+
   llvm::Constant *Components[] = {
     Builder.getInt16(TypeKind), Builder.getInt16(TypeInfo),
     llvm::ConstantDataArray::getString(getLLVMContext(), Buffer)
diff --git a/compiler-rt/lib/ubsan/ubsan_value.cpp b/compiler-rt/lib/ubsan/ubsan_value.cpp
index dc61e5b939d95..6e88ebaf34d4b 100644
--- a/compiler-rt/lib/ubsan/ubsan_value.cpp
+++ b/compiler-rt/lib/ubsan/ubsan_value.cpp
@@ -67,18 +67,21 @@ const char *__ubsan::getObjCClassName(ValueHandle Pointer) {
 
 SIntMax Value::getSIntValue() const {
   CHECK(getType().isSignedIntegerTy());
+  // Val was zero-extended to ValueHandle. Sign-extend from original width
+  // to SIntMax.
+  const unsigned ExtraBits =
+      sizeof(SIntMax) * 8 - getType().getIntegerBitCount();
   if (isInlineInt()) {
-    // Val was zero-extended to ValueHandle. Sign-extend from original width
-    // to SIntMax.
-    const unsigned ExtraBits =
-      sizeof(SIntMax) * 8 - getType().getIntegerBitWidth();
     return SIntMax(UIntMax(Val) << ExtraBits) >> ExtraBits;
   }
-  if (getType().getIntegerBitWidth() == 64)
-    return *reinterpret_cast<s64*>(Val);
+  if (getType().getIntegerBitWidth() == 64) {
+    return SIntMax(UIntMax(*reinterpret_cast<s64 *>(Val)) << ExtraBits) >>
+           ExtraBits;
+  }
 #if HAVE_INT128_T
   if (getType().getIntegerBitWidth() == 128)
-    return *reinterpret_cast<s128*>(Val);
+    return SIntMax(UIntMax(*reinterpret_cast<s128 *>(Val)) << ExtraBits) >>
+           ExtraBits;
 #else
   if (getType().getIntegerBitWidth() == 128)
     UNREACHABLE("libclang_rt.ubsan was built without __int128 support");
diff --git a/compiler-rt/lib/ubsan/ubsan_value.h b/compiler-rt/lib/ubsan/ubsan_value.h
index e0957276dd241..acef362718204 100644
--- a/compiler-rt/lib/ubsan/ubsan_value.h
+++ b/compiler-rt/lib/ubsan/ubsan_value.h
@@ -103,6 +103,14 @@ class TypeDescriptor {
     /// representation is that of bitcasting the floating-point value to an
     /// integer type.
     TK_Float = 0x0001,
+    /// An _BitInt(N) type. Lowest bit is 1 for a signed value, 0 for an
+    /// unsigned
+    /// value. Remaining bits are log_2(bit_width). The value representation is
+    /// the integer itself if it fits into a ValueHandle, and a pointer to the
+    /// integer otherwise. TypeName contains the true width of the type for the
+    /// signed _BitInt(N) type stored after zero bit after TypeName as 32-bit
+    //  unsigned integer.
+    TK_BitInt = 0x0002,
     /// Any other type. The value representation is unspecified.
     TK_Unknown = 0xffff
   };
@@ -113,10 +121,15 @@ class TypeDescriptor {
     return static_cast<Kind>(TypeKind);
   }
 
-  bool isIntegerTy() const { return getKind() == TK_Integer; }
+  bool isIntegerTy() const {
+    return getKind() == TK_Integer || getKind() == TK_BitInt;
+  }
+  bool isBitIntTy() const { return getKind() == TK_BitInt; }
+
   bool isSignedIntegerTy() const {
     return isIntegerTy() && (TypeInfo & 1);
   }
+  bool isSignedBitIntTy() const { return isBitIntTy() && (TypeInfo & 1); }
   bool isUnsignedIntegerTy() const {
     return isIntegerTy() && !(TypeInfo & 1);
   }
@@ -125,6 +138,26 @@ class TypeDescriptor {
     return 1 << (TypeInfo >> 1);
   }
 
+  const char *getBitIntBitCountPointer() const {
+    CHECK(isBitIntTy());
+    CHECK(isSignedBitIntTy());
+    // Scan Name for zero and return the next address
+    const char *p = getTypeName();
+    while (*p != '\0') {
+      p++;
+    }
+    // Return the next address
+    return p + 1;
+  }
+
+  unsigned getIntegerBitCount() const {
+    CHECK(isIntegerTy());
+    if (isSignedBitIntTy())
+      return *reinterpret_cast<const u32 *>(getBitIntBitCountPointer());
+    else
+      return getIntegerBitWidth();
+  }
+
   bool isFloatTy() const { return getKind() == TK_Float; }
   unsigned getFloatBitWidth() const {
     CHECK(isFloatTy());
diff --git a/compiler-rt/test/ubsan/TestCases/Integer/bit-int.c b/compiler-rt/test/ubsan/TestCases/Integer/bit-int.c
new file mode 100644
index 0000000000000..9ec096a0cf20d
--- /dev/null
+++ b/compiler-rt/test/ubsan/TestCases/Integer/bit-int.c
@@ -0,0 +1,188 @@
+// RUN: %clang -Wno-constant-conversion -Wno-array-bounds -Wno-division-by-zero -Wno-shift-negative-value -Wno-shift-count-negative -Wno-int-to-pointer-cast -O0 -fsanitize=alignment,array-bounds,bool,float-cast-overflow,implicit-integer-sign-change,implicit-signed-integer-truncation,implicit-unsigned-integer-truncation,integer-divide-by-zero,nonnull-attribute,null,nullability-arg,nullability-assign,nullability-return,pointer-overflow,returns-nonnull-attribute,shift-base,shift-exponent,signed-integer-overflow,unreachable,unsigned-integer-overflow,unsigned-shift-base,vla-bound %s -o %t1 && %run %t1 2>&1 | FileCheck %s --check-prefix=CHECK-R
+// RUN: %clang -Wno-constant-conversion -Wno-array-bounds -Wno-division-by-zero -Wno-shift-negative-value -Wno-shift-count-negative -Wno-int-to-pointer-cast -fsanitize=array-bounds,enum,float-cast-overflow,integer-divide-by-zero,implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change,unsigned-integer-overflow,signed-integer-overflow,shift-base,shift-exponent -O0 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-IR
+
+#include <stdint.h>
+#include <stdio.h>
+
+uint32_t float_divide_by_zero() {
+  float f = 1.0f / 0.0f;
+  // CHECK-IR: constant { i16, i16, [8 x i8] } { i16 1, i16 32, [8 x i8] c"'float'\00" }
+  _BitInt(37) r = (_BitInt(37))f;
+  // CHECK-R: {{.*}}bit-int.c:[[@LINE-1]]:19: runtime error: inf is outside the range of representable values of type
+  // CHECK-IR: constant { i16, i16, [20 x i8] } { i16 2, i16 13, [20 x i8] c"'_BitInt(37)'\00%\00\00\00\00\00" }
+  return r;
+}
+
+uint32_t integer_divide_by_zero() __attribute__((no_sanitize("memory"))) {
+  _BitInt(37) x = 1 / 0;
+  // CHECK-R: {{.*}}bit-int.c:[[@LINE-1]]:21: runtime error: division by zero
+  // CHECK-IR: constant { i16, i16, [32 x i8] } { i16 0, i16 10, [32 x i8] c"'uint32_t' (aka 'unsigned int')\00" }
+  return x;
+}
+
+uint32_t implicit_unsigned_integer_truncation() {
+  unsigned _BitInt(37) x = 0U;
+  x += float_divide_by_zero();
+  x += integer_divide_by_zero();
+  x += ~0ULL;
+  // CHECK-R: {{.*}}bit-int.c:[[@LINE-1]]:5: runtime error: unsigned integer overflow:
+  // CHECK-IR: constant { i16, i16, [23 x i8] } { i16 0, i16 12, [23 x i8] c"'unsigned _BitInt(37)'\00" }
+  uint32_t r = x;
+  return r;
+}
+
+uint32_t pointer_overflow() __attribute__((no_sanitize("address"))) {
+  _BitInt(37) *x = (_BitInt(37) *)1;
+  _BitInt(37) *y = x - 1;
+  // CHECK-R: {{.*}}bit-int.c:[[@LINE-1]]:22: runtime error: pointer index expression with base
+  uint32_t r = *(_BitInt(37) *)&y;
+  // CHECK-R: {{.*}}bit-int.c:[[@LINE-1]]:16: runtime error: implicit conversion from type
+  return r;
+}
+
+uint32_t vla_bound(_BitInt(37) x) {
+  _BitInt(37) a[x - 1];
+  // CHECK-R: {{.*}}bit-int.c:[[@LINE-1]]:17: runtime error: variable length array bound evaluates to non-positive value
+  return 0;
+}
+
+uint32_t nullability_arg(_BitInt(37) *_Nonnull x)
+    __attribute__((no_sanitize("address"))) {
+  _BitInt(37) y = *(_BitInt(37) *)&x;
+  return y;
+}
+
+uint32_t unsigned_shift_base() {
+  unsigned _BitInt(37) x = ~0U << 1;
+  // CHECK-R: {{.*}}bit-int.c:[[@LINE-1]]:32: runtime error: left shift of 4294967295 by 1 places cannot be represented in type
+  return x;
+}
+
+uint32_t array_bounds() {
+  _BitInt(37) x[4];
+  _BitInt(37) y = x[10];
+  // CHECK-R: {{.*}}bit-int.c:[[@LINE-1]]:19: runtime error: index 10 out of bounds for type
+  // CHECK-IR: constant { i16, i16, [17 x i8] } { i16 -1, i16 0, [17 x i8] c"'_BitInt(37)[4]'\00" }
+  return (uint32_t)y;
+}
+
+uint32_t float_cast_overflow() {
+  float a = 100000000.0f;
+  _BitInt(7) b = (_BitInt(7))a;
+  // CHECK-R: {{.*}}bit-int.c:[[@LINE-1]]:18: runtime error: 1e+08 is outside the range of representable values of type
+  // CHECK-IR: constant { i16, i16, [19 x i8] } { i16 2, i16 7, [19 x i8] c"'_BitInt(7)'\00\07\00\00\00\00\00" }
+  return b;
+}
+
+uint32_t implicit_integer_sign_change(unsigned _BitInt(37) x) {
+  _BitInt(37) r = x;
+  // CHECK-R: {{.*}}bit-int.c:[[@LINE-1]]:19: runtime error: implicit conversion from type '{{[^']+}}' of value
+  return r & 0xFFFFFFFF;
+}
+
+_BitInt(13) implicit_signed_integer_truncation() {
+  _BitInt(73) x = (_BitInt(73)) ~((~0UL) >> 1);
+  return x;
+  // CHECK-R: {{.*}}bit-int.c:[[@LINE-1]]:10: runtime error: implicit conversion from type
+  // CHECK-IR: constant { i16, i16, [20 x i8] } { i16 2, i16 {{([[:xdigit:]]{2})}}, [20 x i8] c"'_BitInt(73)'\00I\00\00\00\00\00" }
+  // CHECK-IR: constant { i16, i16, [20 x i8] } { i16 2, i16 9, [20 x i8] c"'_BitInt(13)'\00\0D\00\00\00\00\00" }
+}
+
+_BitInt(37) nonnull_attribute(__attribute__((nonnull)) _BitInt(37) * x)
+    __attribute__((no_sanitize("address"))) {
+  return *(_BitInt(37) *)&x;
+}
+
+uint32_t nullability_assign(_BitInt(37) * x)
+    __attribute__((no_sanitize("address"))) {
+  _BitInt(37) *_Nonnull y = x;
+  uint32_t r = *(_BitInt(37) *)&y;
+  return r;
+}
+
+_BitInt(37) shift_exponent() {
+  _BitInt(37) x = 1 << (-1);
+  // CHECK-R: {{.*}}bit-int.c:[[@LINE-1]]:21: runtime error: shift exponent -1 is negative
+  return x;
+}
+
+_BitInt(37) shift_base() {
+  _BitInt(37) x = (-1) << 1;
+  // CHECK-R: {{.*}}bit-int.c:[[@LINE-1]]:24: runtime error: left shift of negative value -1
+  return x;
+}
+
+uint32_t negative_shift1(unsigned _BitInt(37) x) {
+  _BitInt(9) c = -2;
+  return x >> c;
+  // CHECK-R: {{.*}}bit-int.c:[[@LINE-1]]:12: runtime error: shift exponent -2 is negative
+  // CHECK-IR: constant { i16, i16, [19 x i8] } { i16 2, i16 9, [19 x i8] c"'_BitInt(9)'\00\09\00\00\00\00\00" }
+}
+
+uint32_t negative_shift2(unsigned _BitInt(37) x) {
+  _BitInt(17) c = -2;
+  return x >> c;
+  // CHECK-R: {{.*}}bit-int.c:[[@LINE-1]]:12: runtime error: shift exponent -2 is negative
+  // CHECK-IR: constant { i16, i16, [20 x i8] } { i16 2, i16 11, [20 x i8] c"'_BitInt(17)'\00\11\00\00\00\00\00" }
+}
+
+uint32_t negative_shift3(unsigned _BitInt(37) x) {
+  _BitInt(34) c = -2;
+  return x >> c;
+  // CHECK-R: {{.*}}bit-int.c:[[@LINE-1]]:12: runtime error: shift exponent -2 is negative
+  // CHECK-IR: constant { i16, i16, [20 x i8] } { i16 2, i16 13, [20 x i8] c"'_BitInt(34)'\00\22\00\00\00\00\00" }
+}
+
+uint32_t negative_shift4(unsigned _BitInt(37) x) {
+  int64_t c = -2;
+  return x >> c;
+  // CHECK-R: {{.*}}bit-int.c:[[@LINE-1]]:12: runtime error: shift exponent -2 is negative
+}
+
+uint32_t negative_shift5(unsigned _BitInt(37) x) {
+  _BitInt(68) c = -2;
+  return x >> c;
+  // CHECK-R: {{.*}}bit-int.c:[[@LINE-1]]:12: runtime error: shift exponent -2 is negative
+  // CHECK-IR: constant { i16, i16, [20 x i8] } { i16 2, i16 {{([[:xdigit:]]{2})}}, [20 x i8] c"'_BitInt(68)'\00D\00\00\00\00\00" }
+}
+
+uint32_t unsigned_integer_overflow() {
+  unsigned _BitInt(37) x = ~0U;
+  x++;
+  return x;
+  // CHECK-R: {{.*}}bit-int.c:[[@LINE-1]]:10: runtime error: implicit conversion from type
+}
+
+uint32_t signed_integer_overflow() {
+  _BitInt(37) x = (_BitInt(37)) ~((~0U) >> 1);
+  x--;
+  return x;
+}
+
+int main(int argc, char **argv) {
+  // clang-format off
+  uint64_t result =
+      1ULL +
+      implicit_unsigned_integer_truncation() +
+      pointer_overflow() +
+      vla_bound(argc) +
+      nullability_arg((_BitInt(37) *)argc) +
+      unsigned_shift_base() +
+      (uint32_t)array_bounds() +
+      float_cast_overflow() +
+      implicit_integer_sign_change((unsigned _BitInt(37))(argc - 2)) +
+      (uint64_t)implicit_signed_integer_truncation() +
+      ((uint64_t)nonnull_attribute((_BitInt(37) *)argc) & 0xFFFFFFFF) +
+      nullability_assign((_BitInt(37) *)argc) +
+      shift_exponent() +
+      (uint32_t)shift_base() +
+      negative_shift1(5) +
+      negative_shift2(5) +
+      negative_shift3(5) +
+      negative_shift4(5) +
+      negative_shift5(5) +
+      unsigned_integer_overflow() +
+      signed_integer_overflow();
+  // clang-format on
+  printf("%u\n", (uint32_t)(result & 0xFFFFFFFF));
+}

>From c91ba013af2faf9fefaa20ed3bbed2413badd8e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?E=C3=A4nolituri=20L=C3=B3mitaur=C3=AB?=
 <eanoliture at gmail.com>
Date: Fri, 24 May 2024 11:39:35 -0400
Subject: [PATCH 2/2] [ubsan] Display correct runtime messages for negative
 _BitInt

Without this patch compiler-rt ubsan library has a bug displaying
incorrect values for variables of the _BitInt (previousely called
_ExtInt) type. This patch affects affects both: generation of metadata
inside code generator and runtime part. The runtime part provided only
for i386 and x86_64 runtimes. Other runtimes should be updated to take
full benefit of this patch.
The patch is constructed the way to be backward compatible and
int and float type runtime diagnostics should be unaffected for not
yet apdated runtimes.
---
 compiler-rt/test/ubsan/TestCases/Integer/bit-int.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/compiler-rt/test/ubsan/TestCases/Integer/bit-int.c b/compiler-rt/test/ubsan/TestCases/Integer/bit-int.c
index 9ec096a0cf20d..aee8d9b2f2964 100644
--- a/compiler-rt/test/ubsan/TestCases/Integer/bit-int.c
+++ b/compiler-rt/test/ubsan/TestCases/Integer/bit-int.c
@@ -21,13 +21,13 @@ uint32_t integer_divide_by_zero() __attribute__((no_sanitize("memory"))) {
 }
 
 uint32_t implicit_unsigned_integer_truncation() {
-  unsigned _BitInt(37) x = 0U;
+  unsigned _BitInt(37) x = 2U;
   x += float_divide_by_zero();
   x += integer_divide_by_zero();
-  x += ~0ULL;
-  // CHECK-R: {{.*}}bit-int.c:[[@LINE-1]]:5: runtime error: unsigned integer overflow:
+  x = x + 0xFFFFFFFFFFFFFFFFULL;
+  // CHECK-R: {{.*}}bit-int.c:[[@LINE-1]]:9: runtime error: unsigned integer overflow:
   // CHECK-IR: constant { i16, i16, [23 x i8] } { i16 0, i16 12, [23 x i8] c"'unsigned _BitInt(37)'\00" }
-  uint32_t r = x;
+  uint32_t r = x & 0xFFFFFFFF;
   return r;
 }