[libc-commits] [libc] [libc] avoid type-pruning with inactive union member (PR #116685)

via libc-commits libc-commits at lists.llvm.org
Mon Nov 18 11:21:20 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-libc

Author: Schrodinger ZHU Yifan (SchrodingerZhu)

<details>
<summary>Changes</summary>

According to https://stackoverflow.com/questions/11373203/accessing-inactive-union-member-and-undefined-behavior

> In a union, at most one of the non-static data members can be active at any time, that is, the value of at most one of the non-static data members can be stored in a union at any time.

> although we can legitimately form an lvalue to a non-active union member (which is why assigning to a non-active member without construction is ok) it is considered to be uninitialized.

ISO C++ does not permit type pruning by accessing inactive union member. 

---
Full diff: https://github.com/llvm/llvm-project/pull/116685.diff


5 Files Affected:

- (modified) libc/fuzzing/__support/hashtable_fuzz.cpp (+8-8) 
- (modified) libc/src/__support/HashTable/generic/bitmask_impl.inc (+5-6) 
- (modified) libc/src/__support/hash.h (+9-11) 
- (modified) libc/test/src/__support/HashTable/group_test.cpp (+7-8) 
- (modified) libc/test/src/__support/HashTable/table_test.cpp (+1-1) 


``````````diff
diff --git a/libc/fuzzing/__support/hashtable_fuzz.cpp b/libc/fuzzing/__support/hashtable_fuzz.cpp
index 7d61e106c9c4a3..8ab5e3b55cfd4b 100644
--- a/libc/fuzzing/__support/hashtable_fuzz.cpp
+++ b/libc/fuzzing/__support/hashtable_fuzz.cpp
@@ -10,6 +10,7 @@
 ///
 //===----------------------------------------------------------------------===//
 #include "include/llvm-libc-types/ENTRY.h"
+#include "src/__support/CPP/bit.h"
 #include "src/__support/CPP/string_view.h"
 #include "src/__support/HashTable/table.h"
 #include "src/__support/macros/config.h"
@@ -81,15 +82,14 @@ static struct {
 
   template <typename T> T next() {
     static_assert(cpp::is_integral<T>::value, "T must be an integral type");
-    union {
-      T result;
-      char data[sizeof(T)];
-    };
-    for (size_t i = 0; i < sizeof(result); i++)
+
+    char data[sizeof(T)];
+
+    for (size_t i = 0; i < sizeof(T); i++)
       data[i] = buffer[i];
-    buffer += sizeof(result);
-    remaining -= sizeof(result);
-    return result;
+    buffer += sizeof(T);
+    remaining -= sizeof(T);
+    return cpp::bit_cast<T>(data);
   }
 
   cpp::string_view next_string() {
diff --git a/libc/src/__support/HashTable/generic/bitmask_impl.inc b/libc/src/__support/HashTable/generic/bitmask_impl.inc
index 469ddeeed8a859..d526dc1ece293d 100644
--- a/libc/src/__support/HashTable/generic/bitmask_impl.inc
+++ b/libc/src/__support/HashTable/generic/bitmask_impl.inc
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
 #include "src/__support/endian_internal.h"
 #include "src/__support/macros/config.h"
@@ -44,13 +45,11 @@ struct Group {
 
   // Load a group of control words from an arbitary address.
   LIBC_INLINE static Group load(const void *addr) {
-    union {
-      bitmask_t value;
-      char bytes[sizeof(bitmask_t)];
-    } data;
+    char bytes[sizeof(bitmask_t)];
+
     for (size_t i = 0; i < sizeof(bitmask_t); ++i)
-      data.bytes[i] = static_cast<const char *>(addr)[i];
-    return {data.value};
+      bytes[i] = static_cast<const char *>(addr)[i];
+    return Group{cpp::bit_cast<bitmask_t>(bytes)};
   }
 
   // Load a group of control words from an aligned address.
diff --git a/libc/src/__support/hash.h b/libc/src/__support/hash.h
index 527c83993fd59a..49138b1f43b9ed 100644
--- a/libc/src/__support/hash.h
+++ b/libc/src/__support/hash.h
@@ -13,8 +13,8 @@
 #include "src/__support/CPP/limits.h"        // numeric_limits
 #include "src/__support/macros/attributes.h" // LIBC_INLINE
 #include "src/__support/macros/config.h"
-#include "src/__support/uint128.h"           // UInt128
-#include <stdint.h>                          // For uint64_t
+#include "src/__support/uint128.h" // UInt128
+#include <stdint.h>                // For uint64_t
 
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {
@@ -34,25 +34,23 @@ LIBC_INLINE uint64_t folded_multiply(uint64_t x, uint64_t y) {
 // Therefore, we use a union to read the value.
 template <typename T> LIBC_INLINE T read_little_endian(const void *ptr) {
   const uint8_t *bytes = static_cast<const uint8_t *>(ptr);
-  union {
-    T value;
-    uint8_t buffer[sizeof(T)];
-  } data;
+  uint8_t buffer[sizeof(T)];
 #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
-  // Compiler should able to optimize this as a load followed by a byte swap.
-  // On aarch64 (-mbig-endian), this compiles to the following for int:
+  // Compiler should able to optimize this as a load followed by a byte
+  // swap. On aarch64 (-mbig-endian), this compiles to the following for
+  // int:
   //      ldr     w0, [x0]
   //      rev     w0, w0
   //      ret
   for (size_t i = 0; i < sizeof(T); ++i) {
-    data.buffer[i] = bytes[sizeof(T) - i - 1];
+    buffer[i] = bytes[sizeof(T) - i - 1];
   }
 #else
   for (size_t i = 0; i < sizeof(T); ++i) {
-    data.buffer[i] = bytes[i];
+    buffer[i] = bytes[i];
   }
 #endif
-  return data.value;
+  return cpp::bit_cast<T>(buffer);
 }
 
 // Specialized read functions for small values. size must be <= 8.
diff --git a/libc/test/src/__support/HashTable/group_test.cpp b/libc/test/src/__support/HashTable/group_test.cpp
index 25b15312ad6680..acdc58e205852a 100644
--- a/libc/test/src/__support/HashTable/group_test.cpp
+++ b/libc/test/src/__support/HashTable/group_test.cpp
@@ -8,6 +8,7 @@
 
 #include "src/__support/HashTable/bitmask.h"
 
+#include "src/__support/CPP/bit.h"
 #include "src/__support/macros/config.h"
 #include "src/stdlib/rand.h"
 #include "test/UnitTest/Test.h"
@@ -28,14 +29,13 @@ TEST(LlvmLibcHashTableBitMaskTest, Match) {
   size_t appearance[4][sizeof(Group)];
   ByteArray array{};
 
-  union {
-    uintptr_t random;
-    int data[sizeof(uintptr_t) / sizeof(int)];
-  };
+  int data[sizeof(uintptr_t) / sizeof(int)];
 
   for (int &i : data)
     i = rand();
 
+  uintptr_t random = cpp::bit_cast<uintptr_t>(data);
+
   for (size_t i = 0; i < sizeof(Group); ++i) {
     size_t choice = random % 4;
     random /= 4;
@@ -62,14 +62,13 @@ TEST(LlvmLibcHashTableBitMaskTest, MaskAvailable) {
   for (size_t i = 0; i < sizeof(Group); ++i) {
     ByteArray array{};
 
-    union {
-      uintptr_t random;
-      int data[sizeof(uintptr_t) / sizeof(int)];
-    };
+    int data[sizeof(uintptr_t) / sizeof(int)];
 
     for (int &j : data)
       j = rand();
 
+    uintptr_t random = cpp::bit_cast<uintptr_t>(data);
+
     ASSERT_FALSE(Group::load(array.data).mask_available().any_bit_set());
 
     array.data[i] = 0x80;
diff --git a/libc/test/src/__support/HashTable/table_test.cpp b/libc/test/src/__support/HashTable/table_test.cpp
index f8ffa4d4123d35..c3b8697f2087a1 100644
--- a/libc/test/src/__support/HashTable/table_test.cpp
+++ b/libc/test/src/__support/HashTable/table_test.cpp
@@ -82,7 +82,7 @@ TEST(LlvmLibcTableTest, GrowthSequence) {
 }
 
 TEST(LlvmLibcTableTest, Insertion) {
-  union key {
+  struct key {
     char bytes[2];
   } keys[256];
   for (size_t k = 0; k < 256; ++k) {

``````````

</details>


https://github.com/llvm/llvm-project/pull/116685


More information about the libc-commits mailing list