[clang] [WIP][libc++] Add builtin to clear padding bytes (prework for P0528R3) (PR #75371)

via cfe-commits cfe-commits at lists.llvm.org
Wed Dec 13 11:41:00 PST 2023


https://github.com/huixie90 created https://github.com/llvm/llvm-project/pull/75371

None

>From b7b97148c54dda550fcfb024236c32a6bdca16fd Mon Sep 17 00:00:00 2001
From: zoecarver <z.zoelec2 at gmail.com>
Date: Sat, 2 Dec 2023 20:00:30 +0000
Subject: [PATCH 1/2] [Builtin] Add __builtin_zero_non_value_bits.

Adds `__builtin_zero_non_value_bits` to zero all padding bits of a struct. This builtin should match the behavior of those in NVCC and GCC (and MSVC?). There are some tests in this patch but hopefully we'll also get tests from other compilers (so all builtins can be as similar as possible).

I'm planning to add support for unions, bitfields (both as members and members of sub-objects), and booleans as follow up patches.

Differential Revision: https://reviews.llvm.org/D87974
---
 clang/include/clang/Basic/Builtins.def        |   1 +
 clang/lib/CodeGen/CGBuiltin.cpp               |  96 +++++++
 clang/lib/Sema/SemaChecking.cpp               |  20 ++
 .../builtin-zero-non-value-bits-codegen.cpp   | 112 ++++++++
 .../builtin-zero-non-value-bits.cpp           | 249 ++++++++++++++++++
 .../SemaCXX/builtin-zero-non-value-bits.cpp   |  15 ++
 6 files changed, 493 insertions(+)
 create mode 100644 clang/test/CodeGenCXX/builtin-zero-non-value-bits-codegen.cpp
 create mode 100644 clang/test/CodeGenCXX/builtin-zero-non-value-bits.cpp
 create mode 100644 clang/test/SemaCXX/builtin-zero-non-value-bits.cpp

diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index ec39e926889b93..ba944a6f04fdc0 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -633,6 +633,7 @@ BUILTIN(__builtin_vsscanf, "icC*RcC*Ra", "FS:1:")
 BUILTIN(__builtin_thread_pointer, "v*", "nc")
 BUILTIN(__builtin_launder, "v*v*", "ntE")
 LANGBUILTIN(__builtin_is_constant_evaluated, "b", "nE", CXX_LANG)
+LANGBUILTIN(__builtin_zero_non_value_bits, "v.", "n", CXX_LANG)
 
 // GCC exception builtins
 BUILTIN(__builtin_eh_return, "vzv*", "r") // FIXME: Takes intptr_t, not size_t!
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 65d9862621061d..34b272f9bddbc9 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2456,6 +2456,95 @@ static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
   return RValue::get(CGF->Builder.CreateCall(UBF, Args));
 }
 
+static void RecursivelyZeroNonValueBits(CodeGenFunction &CGF, Value *Ptr,
+                                        QualType Ty) {
+  auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
+  auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
+  auto WriteZeroAtOffset = [&](size_t Offset) {
+    auto Index = ConstantInt::get(CGF.IntTy, Offset);
+    auto Element = CGF.Builder.CreateGEP(I8Ptr, Index);
+    CGF.Builder.CreateAlignedStore(
+        Zero, Element,
+        CharUnits::One().alignmentAtOffset(CharUnits::fromQuantity(Offset)));
+  };
+  auto GetStructLayout = [&CGF](llvm::Type *Ty) {
+    auto ST = cast<StructType>(Ty);
+    return CGF.CGM.getModule().getDataLayout().getStructLayout(ST);
+  };
+
+  auto ST = cast<StructType>(Ptr->getType()->getPointerElementType());
+  auto SL = GetStructLayout(ST);
+  auto R = cast<CXXRecordDecl>(Ty->getAsRecordDecl());
+  const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R);
+  size_t RunningOffset = 0;
+  for (auto Base : R->bases()) {
+    // Zero padding between base elements.
+    auto BaseRecord = cast<CXXRecordDecl>(Base.getType()->getAsRecordDecl());
+    auto Offset = static_cast<size_t>(
+        ASTLayout.getBaseClassOffset(BaseRecord).getQuantity());
+    for (; RunningOffset < Offset; ++RunningOffset) {
+      WriteZeroAtOffset(RunningOffset);
+    }
+    // Recursively zero out base classes.
+    auto Index = SL->getElementContainingOffset(Offset);
+    auto BaseElement = CGF.Builder.CreateStructGEP(Ptr, Index);
+    RecursivelyZeroNonValueBits(CGF, BaseElement, Base.getType());
+    // Use the LLVM StructType data layout so we pick up on packed types.
+    auto SL = GetStructLayout(ST->getElementType(Index));
+    auto Size = SL->getSizeInBytes();
+    RunningOffset = Offset + Size;
+  }
+
+  size_t NumFields = std::distance(R->field_begin(), R->field_end());
+  auto CurrentField = R->field_begin();
+  for (size_t I = 0; I < NumFields; ++I, ++CurrentField) {
+    // Size needs to be in bytes so we can compare it later.
+    auto Offset = ASTLayout.getFieldOffset(I) / 8;
+    for (; RunningOffset < Offset; ++RunningOffset) {
+      WriteZeroAtOffset(RunningOffset);
+    }
+
+    auto Index = SL->getElementContainingOffset(Offset);
+    // If this field is an object, it may have non-zero padding.
+    if (CurrentField->getType()->isRecordType()) {
+      auto Element = CGF.Builder.CreateStructGEP(Ptr, Index);
+      RecursivelyZeroNonValueBits(CGF, Element, CurrentField->getType());
+    }
+
+    // TODO: warn if non-constant array type.
+    if (isa<ConstantArrayType>(CurrentField->getType()) &&
+        CurrentField->getType()
+            ->getArrayElementTypeNoTypeQual()
+            ->isRecordType()) {
+      auto FieldElement = CGF.Builder.CreateStructGEP(Ptr, Index);
+      auto AT = cast<ConstantArrayType>(CurrentField->getType());
+      for (size_t ArrIndex = 0; ArrIndex < AT->getSize().getLimitedValue();
+           ++ArrIndex) {
+        auto ElementRecord = AT->getElementType()->getAsRecordDecl();
+        auto ElementAlign =
+            CGF.getContext().getASTRecordLayout(ElementRecord).getAlignment();
+        Address FieldElementAddr{FieldElement, ElementAlign};
+        auto Element =
+            CGF.Builder.CreateConstArrayGEP(FieldElementAddr, ArrIndex);
+        RecursivelyZeroNonValueBits(CGF, Element.getPointer(),
+                                    AT->getElementType());
+      }
+    }
+
+    auto Size = CGF.CGM.getModule()
+                    .getDataLayout()
+                    .getTypeSizeInBits(ST->getElementType(Index))
+                    .getKnownMinSize() /
+                8;
+    RunningOffset = Offset + Size;
+  }
+  // Clear all bits after the last field.
+  auto Size = SL->getSizeInBytes();
+  for (; RunningOffset < Size; ++RunningOffset) {
+    WriteZeroAtOffset(RunningOffset);
+  }
+}
+
 RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
                                         const CallExpr *E,
                                         ReturnValueSlot ReturnValue) {
@@ -4315,6 +4404,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
 
     return RValue::get(Ptr);
   }
+  case Builtin::BI__builtin_zero_non_value_bits: {
+    const Expr *Op = E->getArg(0);
+    Value *Address = EmitScalarExpr(Op);
+    auto PointeeTy = Op->getType()->getPointeeType();
+    RecursivelyZeroNonValueBits(*this, Address, PointeeTy);
+    return RValue::get(nullptr);
+  }
   case Builtin::BI__sync_fetch_and_add:
   case Builtin::BI__sync_fetch_and_sub:
   case Builtin::BI__sync_fetch_and_or:
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 77c8334f3ca25d..2fec83885c9496 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2327,6 +2327,26 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
   }
   case Builtin::BI__builtin_launder:
     return SemaBuiltinLaunder(*this, TheCall);
+  case Builtin::BI__builtin_zero_non_value_bits: {
+    const Expr *PtrArg = TheCall->getArg(0)->IgnoreParenImpCasts();
+    const QualType PtrArgType = PtrArg->getType();
+    if (!PtrArgType->isPointerType() ||
+        !PtrArgType->getPointeeType()->isRecordType()) {
+      Diag(PtrArg->getBeginLoc(), diag::err_typecheck_convert_incompatible)
+          << PtrArgType << "structure pointer" << 1 << 0 << 3 << 1 << PtrArgType
+          << "structure pointer";
+      return ExprError();
+    }
+    if (PtrArgType->getPointeeType().isConstQualified()) {
+      Diag(PtrArg->getBeginLoc(), diag::err_typecheck_assign_const)
+          << TheCall->getSourceRange() << 5 /*ConstUnknown*/;
+      return ExprError();
+    }
+    if (RequireCompleteType(PtrArg->getBeginLoc(), PtrArgType->getPointeeType(),
+                            diag::err_typecheck_decl_incomplete_type))
+      return ExprError();
+    break;
+  }
   case Builtin::BI__sync_fetch_and_add:
   case Builtin::BI__sync_fetch_and_add_1:
   case Builtin::BI__sync_fetch_and_add_2:
diff --git a/clang/test/CodeGenCXX/builtin-zero-non-value-bits-codegen.cpp b/clang/test/CodeGenCXX/builtin-zero-non-value-bits-codegen.cpp
new file mode 100644
index 00000000000000..8ad1e148ded861
--- /dev/null
+++ b/clang/test/CodeGenCXX/builtin-zero-non-value-bits-codegen.cpp
@@ -0,0 +1,112 @@
+// RUN: %clang_cc1 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+struct alignas(4) Foo {
+  char a;
+  alignas(2) char b;
+};
+
+struct alignas(4) Bar {
+  char c;
+  alignas(2) char d;
+};
+
+struct alignas(4) Baz : Foo {
+  char e;
+  Bar f;
+};
+
+// Baz structure:
+// "a", PAD_1, "b", PAD_2, "c", PAD_3, PAD_4, PAD_5, "c", PAD_6, "d", PAD_7
+// %struct.Baz = type { %struct.Foo, i8, [3 x i8], %struct.Bar }
+// %struct.Foo = type { i8, i8, i8, i8 }
+// %struct.Bar = type { i8, i8, i8, i8 }
+
+// CHECK-LABEL: define void @_Z7testBazP3Baz(%struct.Baz* %baz)
+// CHECK: [[ADDR:%.*]] = alloca %struct.Baz*
+// CHECK: store %struct.Baz* %baz, %struct.Baz** [[ADDR]]
+// CHECK: [[BAZ:%.*]] = load %struct.Baz*, %struct.Baz** [[ADDR]]
+// CHECK: [[BAZ_RAW_PTR:%.*]] = bitcast %struct.Baz* [[BAZ]] to i8*
+
+// CHECK: [[FOO_BASE:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 0
+// CHECK: [[FOO_RAW_PTR:%.*]] = bitcast %struct.Foo* [[FOO_BASE]] to i8*
+// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 1
+// CHECK: store i8 0, i8* [[PAD_1]]
+// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 3
+// CHECK: store i8 0, i8* [[PAD_2]]
+
+// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 5
+// CHECK: store i8 0, i8* [[PAD_3]]
+// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 6
+// CHECK: store i8 0, i8* [[PAD_4]]
+// CHECK: [[PAD_5:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 7
+// CHECK: store i8 0, i8* [[PAD_5]]
+
+// CHECK: [[BAR_MEMBER:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 3
+// CHECK: [[BAR_RAW_PTR:%.*]] = bitcast %struct.Bar* [[BAR_MEMBER]] to i8*
+// CHECK: [[PAD_6:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 1
+// CHECK: store i8 0, i8* [[PAD_6]]
+// CHECK: [[PAD_7:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 3
+// CHECK: store i8 0, i8* [[PAD_7]]
+// CHECK: ret void
+void testBaz(Baz *baz) {
+  __builtin_zero_non_value_bits(baz);
+}
+
+struct UnsizedTail {
+  int size;
+  alignas(8) char buf[];
+
+  UnsizedTail(int size) : size(size) {}
+};
+
+// UnsizedTail structure:
+// "size", PAD_1, PAD_2, PAD_3, PAD_4
+// %struct.UnsizedTail = type { i32, [4 x i8], [0 x i8] }
+
+// CHECK-LABEL: define void @_Z15testUnsizedTailP11UnsizedTail(%struct.UnsizedTail* %u)
+// CHECK: [[U_ADDR:%.*]] = alloca %struct.UnsizedTail*
+// CHECK: store %struct.UnsizedTail* %u, %struct.UnsizedTail** [[U_ADDR]]
+// CHECK: [[U:%.*]] = load %struct.UnsizedTail*, %struct.UnsizedTail** [[U_ADDR]]
+// CHECK: [[U_RAW_PTR:%.*]] = bitcast %struct.UnsizedTail* [[U]] to i8*
+// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 4
+// CHECK: store i8 0, i8* [[PAD_1]]
+// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 5
+// CHECK: store i8 0, i8* [[PAD_2]]
+// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 6
+// CHECK: store i8 0, i8* [[PAD_3]]
+// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 7
+// CHECK: store i8 0, i8* [[PAD_4]]
+// CHECK: ret void
+void testUnsizedTail(UnsizedTail *u) {
+  __builtin_zero_non_value_bits(u);
+}
+
+struct ArrOfStructsWithPadding {
+  Bar bars[2];
+};
+
+// ArrOfStructsWithPadding structure:
+// "c" (1), PAD_1, "d" (1), PAD_2, "c" (2), PAD_3, "d" (2), PAD_4
+// %struct.ArrOfStructsWithPadding = type { [2 x %struct.Bar] }
+
+// CHECK-LABEL: define void @_Z27testArrOfStructsWithPaddingP23ArrOfStructsWithPadding(%struct.ArrOfStructsWithPadding* %arr)
+// CHECK: [[ARR_ADDR:%.*]] = alloca %struct.ArrOfStructsWithPadding*
+// CHECK: store %struct.ArrOfStructsWithPadding* %arr, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
+// CHECK: [[ARR:%.*]] = load %struct.ArrOfStructsWithPadding*, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
+// CHECK: [[BARS:%.*]] = getelementptr inbounds %struct.ArrOfStructsWithPadding, %struct.ArrOfStructsWithPadding* [[ARR]], i32 0, i32 0
+// CHECK: [[FIRST:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 0
+// CHECK: [[FIRST_RAW_PTR:%.*]] = bitcast %struct.Bar* [[FIRST]] to i8*
+// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FIRST_RAW_PTR]], i32 1
+// CHECK: store i8 0, i8* [[PAD_1]]
+// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* %4, i32 3
+// CHECK: store i8 0, i8* [[PAD_2]]
+// CHECK: [[SECOND:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 1
+// CHECK: [[SECOND_RAW_PTR:%.*]] = bitcast %struct.Bar* [[SECOND]] to i8*
+// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 1
+// CHECK: store i8 0, i8* [[PAD_3]]
+// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 3
+// CHECK: store i8 0, i8* [[PAD_4]]
+// CHECK: ret void
+void testArrOfStructsWithPadding(ArrOfStructsWithPadding *arr) {
+  __builtin_zero_non_value_bits(arr);
+}
diff --git a/clang/test/CodeGenCXX/builtin-zero-non-value-bits.cpp b/clang/test/CodeGenCXX/builtin-zero-non-value-bits.cpp
new file mode 100644
index 00000000000000..ed6e409178bc5c
--- /dev/null
+++ b/clang/test/CodeGenCXX/builtin-zero-non-value-bits.cpp
@@ -0,0 +1,249 @@
+// RUN: mkdir -p %t
+// RUN: %clang++ %s -o %t/run
+// RUN: %t/run
+
+#include <cassert>
+#include <cstdio>
+#include <cstring>
+#include <new>
+
+template <size_t A1, size_t A2, class T>
+struct alignas(A1) BasicWithPadding {
+  T x;
+  alignas(A2) T y;
+};
+
+template <size_t A1, size_t A2, size_t N, class T>
+struct alignas(A1) SpacedArrayMembers {
+  T x[N];
+  alignas(A2) char c;
+  T y[N];
+};
+
+template <size_t A1, size_t A2, class T>
+struct alignas(A1) PaddedPointerMembers {
+  T *x;
+  alignas(A2) T *y;
+};
+
+template <size_t A1, size_t A2, size_t A3, class T>
+struct alignas(A1) ThreeMembers {
+  T x;
+  alignas(A2) T y;
+  alignas(A3) T z;
+};
+
+template <class T>
+struct Normal {
+  T a;
+  T b;
+};
+
+template <class T>
+struct X {
+  T x;
+};
+
+template <class T>
+struct Z {
+  T z;
+};
+
+template <size_t A, class T>
+struct YZ : public Z<T> {
+  alignas(A) T y;
+};
+
+template <size_t A1, size_t A2, class T>
+struct alignas(A1) HasBase : public X<T>, public YZ<A2, T> {
+  T a;
+  alignas(A2) T b;
+};
+
+template <size_t A1, size_t A2, class T>
+void testAllForType(T a, T b, T c, T d) {
+  using B = BasicWithPadding<A1, A2, T>;
+  B basic1;
+  memset(&basic1, 0, sizeof(B));
+  basic1.x = a;
+  basic1.y = b;
+  B basic2;
+  memset(&basic2, 42, sizeof(B));
+  basic2.x = a;
+  basic2.y = b;
+  assert(memcmp(&basic1, &basic2, sizeof(B)) != 0);
+  __builtin_zero_non_value_bits(&basic2);
+  assert(memcmp(&basic1, &basic2, sizeof(B)) == 0);
+
+  using A = SpacedArrayMembers<A1, A2, 2, T>;
+  A arr1;
+  memset(&arr1, 0, sizeof(A));
+  arr1.x[0] = a;
+  arr1.x[1] = b;
+  arr1.y[0] = c;
+  arr1.y[1] = d;
+  A arr2;
+  memset(&arr2, 42, sizeof(A));
+  arr2.x[0] = a;
+  arr2.x[1] = b;
+  arr2.y[0] = c;
+  arr2.y[1] = d;
+  arr2.c = 0;
+  assert(memcmp(&arr1, &arr2, sizeof(A)) != 0);
+  __builtin_zero_non_value_bits(&arr2);
+  assert(memcmp(&arr1, &arr2, sizeof(A)) == 0);
+
+  using P = PaddedPointerMembers<A1, A2, T>;
+  P ptr1;
+  memset(&ptr1, 0, sizeof(P));
+  ptr1.x = &a;
+  ptr1.y = &b;
+  P ptr2;
+  memset(&ptr2, 42, sizeof(P));
+  ptr2.x = &a;
+  ptr2.y = &b;
+  assert(memcmp(&ptr1, &ptr2, sizeof(P)) != 0);
+  __builtin_zero_non_value_bits(&ptr2);
+  assert(memcmp(&ptr1, &ptr2, sizeof(P)) == 0);
+
+  using Three = ThreeMembers<A1, A2, A2, T>;
+  Three three1;
+  memset(&three1, 0, sizeof(Three));
+  three1.x = a;
+  three1.y = b;
+  three1.z = c;
+  Three three2;
+  memset(&three2, 42, sizeof(Three));
+  three2.x = a;
+  three2.y = b;
+  three2.z = c;
+  __builtin_zero_non_value_bits(&three2);
+  assert(memcmp(&three1, &three2, sizeof(Three)) == 0);
+
+  using N = Normal<T>;
+  N normal1;
+  memset(&normal1, 0, sizeof(N));
+  normal1.a = a;
+  normal1.b = b;
+  N normal2;
+  memset(&normal2, 42, sizeof(N));
+  normal2.a = a;
+  normal2.b = b;
+  __builtin_zero_non_value_bits(&normal2);
+  assert(memcmp(&normal1, &normal2, sizeof(N)) == 0);
+
+  using H = HasBase<A1, A2, T>;
+  H base1;
+  memset(&base1, 0, sizeof(H));
+  base1.a = a;
+  base1.b = b;
+  base1.x = c;
+  base1.y = d;
+  base1.z = a;
+  H base2;
+  memset(&base2, 42, sizeof(H));
+  base2.a = a;
+  base2.b = b;
+  base2.x = c;
+  base2.y = d;
+  base2.z = a;
+  assert(memcmp(&base1, &base2, sizeof(H)) != 0);
+  __builtin_zero_non_value_bits(&base2);
+  unsigned i = 0;
+  assert(memcmp(&base1, &base2, sizeof(H)) == 0);
+}
+
+struct UnsizedTail {
+  int size;
+  alignas(8) char buf[];
+
+  UnsizedTail(int size) : size(size) {}
+};
+
+void otherTests() {
+  const size_t size1 = sizeof(UnsizedTail) + 4;
+  char buff1[size1];
+  char buff2[size1];
+  memset(buff1, 0, size1);
+  memset(buff2, 42, size1);
+  auto *u1 = new (buff1) UnsizedTail(4);
+  u1->buf[0] = 1;
+  u1->buf[1] = 2;
+  u1->buf[2] = 3;
+  u1->buf[3] = 4;
+  auto *u2 = new (buff2) UnsizedTail(4);
+  u2->buf[0] = 1;
+  u2->buf[1] = 2;
+  u2->buf[2] = 3;
+  u2->buf[3] = 4;
+  assert(memcmp(u1, u2, sizeof(UnsizedTail)) != 0);
+  __builtin_zero_non_value_bits(u2);
+  assert(memcmp(u1, u2, sizeof(UnsizedTail)) == 0);
+
+  using B = BasicWithPadding<8, 4, char>;
+  auto *basic1 = new B;
+  memset(basic1, 0, sizeof(B));
+  basic1->x = 1;
+  basic1->y = 2;
+  auto *basic2 = new B;
+  memset(basic2, 42, sizeof(B));
+  basic2->x = 1;
+  basic2->y = 2;
+  assert(memcmp(basic1, basic2, sizeof(B)) != 0);
+  __builtin_zero_non_value_bits(basic2);
+  assert(memcmp(basic1, basic2, sizeof(B)) == 0);
+  delete basic2;
+  delete basic1;
+
+  using B = BasicWithPadding<8, 4, char>;
+  B *basic3 = new B;
+  memset(basic3, 0, sizeof(B));
+  basic3->x = 1;
+  basic3->y = 2;
+  B *basic4 = new B;
+  memset(basic4, 42, sizeof(B));
+  basic4->x = 1;
+  basic4->y = 2;
+  assert(memcmp(basic3, basic4, sizeof(B)) != 0);
+  __builtin_zero_non_value_bits(const_cast<volatile B *>(basic4));
+  assert(memcmp(basic3, basic4, sizeof(B)) == 0);
+  delete basic4;
+  delete basic3;
+}
+
+struct Foo {
+  int x;
+  int y;
+};
+
+typedef float Float4Vec __attribute__((ext_vector_type(4)));
+typedef float Float3Vec __attribute__((ext_vector_type(3)));
+
+int main() {
+  testAllForType<32, 16, char>(11, 22, 33, 44);
+  testAllForType<64, 32, char>(4, 5, 6, 7);
+  testAllForType<32, 16, volatile char>(11, 22, 33, 44);
+  testAllForType<64, 32, volatile char>(4, 5, 6, 7);
+  testAllForType<32, 16, int>(0, 1, 2, 3);
+  testAllForType<64, 32, int>(4, 5, 6, 7);
+  testAllForType<32, 16, volatile int>(0, 1, 2, 3);
+  testAllForType<64, 32, volatile int>(4, 5, 6, 7);
+  testAllForType<32, 16, double>(0, 1, 2, 3);
+  testAllForType<64, 32, double>(4, 5, 6, 7);
+  testAllForType<32, 16, _ExtInt(28)>(0, 1, 2, 3);
+  testAllForType<64, 32, _ExtInt(28)>(4, 5, 6, 7);
+  testAllForType<32, 16, _ExtInt(60)>(0, 1, 2, 3);
+  testAllForType<64, 32, _ExtInt(60)>(4, 5, 6, 7);
+  testAllForType<32, 16, _ExtInt(64)>(0, 1, 2, 3);
+  testAllForType<64, 32, _ExtInt(64)>(4, 5, 6, 7);
+  testAllForType<32, 16, Foo>(Foo{1, 2}, Foo{3, 4}, Foo{1, 2}, Foo{3, 4});
+  testAllForType<64, 32, Foo>(Foo{1, 2}, Foo{3, 4}, Foo{1, 2}, Foo{3, 4});
+  testAllForType<256, 128, Float3Vec>(0, 1, 2, 3);
+  testAllForType<128, 128, Float3Vec>(4, 5, 6, 7);
+  testAllForType<256, 128, Float4Vec>(0, 1, 2, 3);
+  testAllForType<128, 128, Float4Vec>(4, 5, 6, 7);
+
+  otherTests();
+
+  return 0;
+}
diff --git a/clang/test/SemaCXX/builtin-zero-non-value-bits.cpp b/clang/test/SemaCXX/builtin-zero-non-value-bits.cpp
new file mode 100644
index 00000000000000..c5b17e8b971e1a
--- /dev/null
+++ b/clang/test/SemaCXX/builtin-zero-non-value-bits.cpp
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+struct Foo {};
+
+struct Incomplete; // expected-note {{forward declaration of 'Incomplete'}}
+
+void test(int a, Foo b, void *c, int *d, Foo *e, const Foo *f, Incomplete *g) {
+  __builtin_zero_non_value_bits(a); // expected-error {{passing 'int' to parameter of incompatible type structure pointer: type mismatch at 1st parameter ('int' vs structure pointer)}}
+  __builtin_zero_non_value_bits(b); // expected-error {{passing 'Foo' to parameter of incompatible type structure pointer: type mismatch at 1st parameter ('Foo' vs structure pointer)}}
+  __builtin_zero_non_value_bits(c); // expected-error {{passing 'void *' to parameter of incompatible type structure pointer: type mismatch at 1st parameter ('void *' vs structure pointer)}}
+  __builtin_zero_non_value_bits(d); // expected-error {{passing 'int *' to parameter of incompatible type structure pointer: type mismatch at 1st parameter ('int *' vs structure pointer)}}
+  __builtin_zero_non_value_bits(e); // This should not error.
+  __builtin_zero_non_value_bits(f); // expected-error {{read-only variable is not assignable}}
+  __builtin_zero_non_value_bits(g); // expected-error {{variable has incomplete type 'Incomplete'}}
+}

>From ff8a6cad88519dec919451c37bba03aa6ef21324 Mon Sep 17 00:00:00 2001
From: Hui <hui.xie0621 at gmail.com>
Date: Wed, 13 Dec 2023 19:23:32 +0000
Subject: [PATCH 2/2] overlapping subobjects + opague pointer

---
 clang/lib/CodeGen/CGBuiltin.cpp               | 163 +++++++++++-------
 clang/lib/Sema/SemaChecking.cpp               |  34 ++--
 .../builtin-zero-non-value-bits.cpp           | 114 +++++++++++-
 3 files changed, 231 insertions(+), 80 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 34b272f9bddbc9..638af25fb7a41e 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -63,6 +63,10 @@
 #include <optional>
 #include <sstream>
 
+
+
+#include <iostream>
+
 using namespace clang;
 using namespace CodeGen;
 using namespace llvm;
@@ -2456,43 +2460,28 @@ static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
   return RValue::get(CGF->Builder.CreateCall(UBF, Args));
 }
 
-static void RecursivelyZeroNonValueBits(CodeGenFunction &CGF, Value *Ptr,
-                                        QualType Ty) {
-  auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
-  auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
-  auto WriteZeroAtOffset = [&](size_t Offset) {
-    auto Index = ConstantInt::get(CGF.IntTy, Offset);
-    auto Element = CGF.Builder.CreateGEP(I8Ptr, Index);
-    CGF.Builder.CreateAlignedStore(
-        Zero, Element,
-        CharUnits::One().alignmentAtOffset(CharUnits::fromQuantity(Offset)));
-  };
-  auto GetStructLayout = [&CGF](llvm::Type *Ty) {
-    auto ST = cast<StructType>(Ty);
-    return CGF.CGM.getModule().getDataLayout().getStructLayout(ST);
-  };
+template <class T>
+void RecursivelyClearPaddingImpl(CodeGenFunction &CGF, Value *Ptr, QualType Ty, size_t CurrentStartOffset, size_t &RunningOffset, T&& WriteZeroAtOffset);
 
-  auto ST = cast<StructType>(Ptr->getType()->getPointerElementType());
-  auto SL = GetStructLayout(ST);
+template <class T>
+void ClearPaddingStruct(CodeGenFunction &CGF, Value *Ptr, QualType Ty, StructType *ST, 
+                        size_t CurrentStartOffset, size_t &RunningOffset, T&& WriteZeroAtOffset) {
+  std::cerr << "\n struct! " << ST->getName().data() << std::endl;
+  auto SL = CGF.CGM.getModule().getDataLayout().getStructLayout(ST);
   auto R = cast<CXXRecordDecl>(Ty->getAsRecordDecl());
   const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R);
-  size_t RunningOffset = 0;
   for (auto Base : R->bases()) {
+    std::cerr << "\n\n base!"  << std::endl;
     // Zero padding between base elements.
     auto BaseRecord = cast<CXXRecordDecl>(Base.getType()->getAsRecordDecl());
     auto Offset = static_cast<size_t>(
         ASTLayout.getBaseClassOffset(BaseRecord).getQuantity());
-    for (; RunningOffset < Offset; ++RunningOffset) {
-      WriteZeroAtOffset(RunningOffset);
-    }
     // Recursively zero out base classes.
     auto Index = SL->getElementContainingOffset(Offset);
-    auto BaseElement = CGF.Builder.CreateStructGEP(Ptr, Index);
-    RecursivelyZeroNonValueBits(CGF, BaseElement, Base.getType());
-    // Use the LLVM StructType data layout so we pick up on packed types.
-    auto SL = GetStructLayout(ST->getElementType(Index));
-    auto Size = SL->getSizeInBytes();
-    RunningOffset = Offset + Size;
+    Value *Idx = CGF.Builder.getSize(Index);
+    llvm::Type *CurrentBaseType = CGF.ConvertTypeForMem(Base.getType());
+    Value *BaseElement = CGF.Builder.CreateGEP(CurrentBaseType, Ptr, Idx);
+    RecursivelyClearPaddingImpl(CGF, BaseElement, Base.getType(), CurrentStartOffset + Offset, RunningOffset, WriteZeroAtOffset);
   }
 
   size_t NumFields = std::distance(R->field_begin(), R->field_end());
@@ -2500,46 +2489,98 @@ static void RecursivelyZeroNonValueBits(CodeGenFunction &CGF, Value *Ptr,
   for (size_t I = 0; I < NumFields; ++I, ++CurrentField) {
     // Size needs to be in bytes so we can compare it later.
     auto Offset = ASTLayout.getFieldOffset(I) / 8;
-    for (; RunningOffset < Offset; ++RunningOffset) {
-      WriteZeroAtOffset(RunningOffset);
-    }
-
     auto Index = SL->getElementContainingOffset(Offset);
-    // If this field is an object, it may have non-zero padding.
-    if (CurrentField->getType()->isRecordType()) {
-      auto Element = CGF.Builder.CreateStructGEP(Ptr, Index);
-      RecursivelyZeroNonValueBits(CGF, Element, CurrentField->getType());
-    }
+    Value *Idx = CGF.Builder.getSize(Index);
+    llvm::Type *CurrentFieldType = CGF.ConvertTypeForMem(CurrentField->getType());
+    Value *Element = CGF.Builder.CreateGEP(CurrentFieldType, Ptr, Idx);
+    RecursivelyClearPaddingImpl(CGF, Element, CurrentField->getType(), CurrentStartOffset + Offset, RunningOffset, WriteZeroAtOffset);
+  }
+}
 
-    // TODO: warn if non-constant array type.
-    if (isa<ConstantArrayType>(CurrentField->getType()) &&
-        CurrentField->getType()
-            ->getArrayElementTypeNoTypeQual()
-            ->isRecordType()) {
-      auto FieldElement = CGF.Builder.CreateStructGEP(Ptr, Index);
-      auto AT = cast<ConstantArrayType>(CurrentField->getType());
-      for (size_t ArrIndex = 0; ArrIndex < AT->getSize().getLimitedValue();
-           ++ArrIndex) {
-        auto ElementRecord = AT->getElementType()->getAsRecordDecl();
-        auto ElementAlign =
-            CGF.getContext().getASTRecordLayout(ElementRecord).getAlignment();
-        Address FieldElementAddr{FieldElement, ElementAlign};
-        auto Element =
-            CGF.Builder.CreateConstArrayGEP(FieldElementAddr, ArrIndex);
-        RecursivelyZeroNonValueBits(CGF, Element.getPointer(),
-                                    AT->getElementType());
-      }
+template <class T>
+void ClearPaddingConstantArray(CodeGenFunction &CGF, Value *Ptr, llvm::Type *Type, ConstantArrayType const *AT, 
+                               size_t CurrentStartOffset, size_t &RunningOffset, T&& WriteZeroAtOffset) {
+  for (size_t ArrIndex = 0; ArrIndex < AT->getSize().getLimitedValue();
+       ++ArrIndex) {
+
+    QualType ElementQualType = AT->getElementType();
+
+    auto *ElementRecord = ElementQualType->getAsRecordDecl();
+    if(!ElementRecord){
+      std::cerr<< "\n\n null!" << std::endl;
     }
+    auto ElementAlign =ElementRecord?
+        CGF.getContext().getASTRecordLayout(ElementRecord).getAlignment():
+        CGF.getContext().getTypeAlignInChars(ElementQualType);
+
+      std::cerr<< "\n\n align: " << ElementAlign.getQuantity() << std::endl;
+    
+    // Value *Idx = CGF.Builder.getSize(0);
+    // Value *ArrayElement = CGF.Builder.CreateGEP(ElementType, Ptr, Idx);
 
-    auto Size = CGF.CGM.getModule()
+    Address FieldElementAddr{Ptr, Type, ElementAlign};
+
+    auto Element =
+        CGF.Builder.CreateConstArrayGEP(FieldElementAddr, ArrIndex);
+    auto *ElementType = CGF.ConvertTypeForMem(ElementQualType);
+    auto AllocSize = CGF.CGM.getModule().getDataLayout().getTypeAllocSize(ElementType);
+    std::cerr << "\n\n clearing array index! " << ArrIndex << std::endl;
+    RecursivelyClearPaddingImpl(CGF, Element.getPointer(), ElementQualType, CurrentStartOffset + ArrIndex * AllocSize.getKnownMinValue(), RunningOffset, WriteZeroAtOffset);
+  }
+}
+
+template <class T>
+void RecursivelyClearPaddingImpl(CodeGenFunction &CGF, Value *Ptr, QualType Ty, size_t CurrentStartOffset, 
+                                 size_t& RunningOffset, T&& WriteZeroAtOffset) {
+
+  std::cerr << "\n\n  zero padding before current  ["<< RunningOffset << ", " << CurrentStartOffset<< ")"<< std::endl;
+  for (; RunningOffset < CurrentStartOffset; ++RunningOffset) {
+    WriteZeroAtOffset(RunningOffset);
+  }
+  auto Type = CGF.ConvertTypeForMem(Ty);
+  auto Size = CGF.CGM.getModule()
                     .getDataLayout()
-                    .getTypeSizeInBits(ST->getElementType(Index))
-                    .getKnownMinSize() /
-                8;
-    RunningOffset = Offset + Size;
+                    .getTypeSizeInBits(Type)
+                    .getKnownMinValue() / 8;
+
+  if (auto *AT = dyn_cast<ConstantArrayType>(Ty)) {
+    ClearPaddingConstantArray(CGF, Ptr, Type, AT, CurrentStartOffset, RunningOffset, WriteZeroAtOffset);
+  }
+  else if (auto *ST = dyn_cast<StructType>(Type)) {
+    ClearPaddingStruct(CGF, Ptr, Ty, ST, CurrentStartOffset, RunningOffset, WriteZeroAtOffset);
+  } else {
+    std::cerr << "\n\n increment running offset from: " << RunningOffset << " to " << RunningOffset + Size << std::endl;
+    RunningOffset += Size;
   }
-  // Clear all bits after the last field.
-  auto Size = SL->getSizeInBytes();
+
+}
+
+static void RecursivelyZeroNonValueBits(CodeGenFunction &CGF, Value *Ptr,
+                                        QualType Ty) {
+  auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
+  auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
+  auto WriteZeroAtOffset = [&](size_t Offset) {
+    auto *Index = ConstantInt::get(CGF.IntTy, Offset);
+    auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+    CGF.Builder.CreateAlignedStore(
+        Zero, Element,
+        CharUnits::One().alignmentAtOffset(CharUnits::fromQuantity(Offset)));
+  };
+
+
+  size_t RunningOffset = 0;
+
+  RecursivelyClearPaddingImpl(CGF, Ptr, Ty, 0, RunningOffset, WriteZeroAtOffset);
+
+  // Clear tail padding
+  auto Type = CGF.ConvertTypeForMem(Ty);
+  
+  auto Size = CGF.CGM.getModule()
+                    .getDataLayout()
+                    .getTypeSizeInBits(Type)
+                    .getKnownMinValue() / 8;
+
+  std::cerr << "\n\n  zero tail padding  ["<< RunningOffset << ", " << Size << ")"<< std::endl;
   for (; RunningOffset < Size; ++RunningOffset) {
     WriteZeroAtOffset(RunningOffset);
   }
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 2fec83885c9496..3470dfca3ed6c7 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2328,23 +2328,23 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
   case Builtin::BI__builtin_launder:
     return SemaBuiltinLaunder(*this, TheCall);
   case Builtin::BI__builtin_zero_non_value_bits: {
-    const Expr *PtrArg = TheCall->getArg(0)->IgnoreParenImpCasts();
-    const QualType PtrArgType = PtrArg->getType();
-    if (!PtrArgType->isPointerType() ||
-        !PtrArgType->getPointeeType()->isRecordType()) {
-      Diag(PtrArg->getBeginLoc(), diag::err_typecheck_convert_incompatible)
-          << PtrArgType << "structure pointer" << 1 << 0 << 3 << 1 << PtrArgType
-          << "structure pointer";
-      return ExprError();
-    }
-    if (PtrArgType->getPointeeType().isConstQualified()) {
-      Diag(PtrArg->getBeginLoc(), diag::err_typecheck_assign_const)
-          << TheCall->getSourceRange() << 5 /*ConstUnknown*/;
-      return ExprError();
-    }
-    if (RequireCompleteType(PtrArg->getBeginLoc(), PtrArgType->getPointeeType(),
-                            diag::err_typecheck_decl_incomplete_type))
-      return ExprError();
+    // const Expr *PtrArg = TheCall->getArg(0)->IgnoreParenImpCasts();
+    // const QualType PtrArgType = PtrArg->getType();
+    // if (!PtrArgType->isPointerType() ||
+    //     !PtrArgType->getPointeeType()->isRecordType()) {
+    //   Diag(PtrArg->getBeginLoc(), diag::err_typecheck_convert_incompatible)
+    //       << PtrArgType << "structure pointer" << 1 << 0 << 3 << 1 << PtrArgType
+    //       << "structure pointer";
+    //   return ExprError();
+    // }
+    // if (PtrArgType->getPointeeType().isConstQualified()) {
+    //   Diag(PtrArg->getBeginLoc(), diag::err_typecheck_assign_const)
+    //       << TheCall->getSourceRange() << 5 /*ConstUnknown*/;
+    //   return ExprError();
+    // }
+    // if (RequireCompleteType(PtrArg->getBeginLoc(), PtrArgType->getPointeeType(),
+    //                         diag::err_typecheck_decl_incomplete_type))
+    //   return ExprError();
     break;
   }
   case Builtin::BI__sync_fetch_and_add:
diff --git a/clang/test/CodeGenCXX/builtin-zero-non-value-bits.cpp b/clang/test/CodeGenCXX/builtin-zero-non-value-bits.cpp
index ed6e409178bc5c..4373dcbdb720ba 100644
--- a/clang/test/CodeGenCXX/builtin-zero-non-value-bits.cpp
+++ b/clang/test/CodeGenCXX/builtin-zero-non-value-bits.cpp
@@ -7,6 +7,21 @@
 #include <cstring>
 #include <new>
 
+template <class T>
+void print_bytes(const T *object)
+{
+  auto size = sizeof(T);
+  const unsigned char * const bytes = reinterpret_cast<const unsigned char *>(object);
+  size_t i;
+
+  fprintf(stderr, "[ ");
+  for(i = 0; i < size; i++)
+  {
+    fprintf(stderr, "%02x ", bytes[i]);
+  }
+  fprintf(stderr, "]\n");
+}
+
 template <size_t A1, size_t A2, class T>
 struct alignas(A1) BasicWithPadding {
   T x;
@@ -74,7 +89,6 @@ void testAllForType(T a, T b, T c, T d) {
   assert(memcmp(&basic1, &basic2, sizeof(B)) != 0);
   __builtin_zero_non_value_bits(&basic2);
   assert(memcmp(&basic1, &basic2, sizeof(B)) == 0);
-
   using A = SpacedArrayMembers<A1, A2, 2, T>;
   A arr1;
   memset(&arr1, 0, sizeof(A));
@@ -219,7 +233,104 @@ struct Foo {
 typedef float Float4Vec __attribute__((ext_vector_type(4)));
 typedef float Float3Vec __attribute__((ext_vector_type(3)));
 
+struct S1 {
+ int x = 0;
+ char c = 0;
+};
+
+struct S2{
+  [[no_unique_address]] S1 s1;
+  bool b;
+  long double l;
+  bool b2;
+};
+
+struct S3{
+  [[no_unique_address]] S1 s1;
+  bool b;
+};
+
+struct alignas(32) S4 {
+  int i;
+};
+struct B1{
+
+};
+
+struct B2 {
+  int x;
+};
+struct B3{
+  char c;
+};
+
+struct B4{
+  bool b;
+};
+
+struct B5{
+  int x2;
+};
+
+struct D:B1,B2,B3,B4,B5{
+  long double l;
+  bool b2;
+};
+
+
 int main() {
+  /*
+  S2 s2{};
+
+  memset(&s2, 42, sizeof(S2));
+  s2.s1.x = 0x12345678;
+  s2.s1.c = 0xff;
+  s2.b = true;
+  s2.l = 3.333;
+  s2.b2 = true;
+  print_bytes(&s2);
+  __builtin_zero_non_value_bits(&s2);
+  print_bytes(&s2);
+
+  D s2{};
+  memset(&s2, 42, sizeof(D));
+  s2.x = 0x12345678;
+  s2.c = 0xff;
+  s2.b = true;
+  s2.x2 = 0x87654321;
+  s2.l = 3.333;
+  s2.b2 = true;
+  print_bytes(&s2);
+  __builtin_zero_non_value_bits(&s2);
+  print_bytes(&s2);
+
+  S3 s2[2];
+
+  memset(&s2, 42, 2*sizeof(S3));
+  s2[0].s1.x = 0x12345678;
+  s2[0].s1.c = 0xff;
+  s2[0].b = true;
+  s2[1].s1.x = 0x12345678;
+  s2[1].s1.c = 0xff;
+  s2[1].b = true;
+  print_bytes(&s2);
+  __builtin_zero_non_value_bits(&s2);
+  print_bytes(&s2);
+
+  */
+/*
+  S4 s2[2];
+
+  memset(&s2, 42, 2*sizeof(S4));
+  s2[0].i = 0x12345678;
+  s2[1].i = 0x12345678;
+  print_bytes(&s2);
+  __builtin_zero_non_value_bits(&s2);
+  print_bytes(&s2);
+
+
+  assert(false);
+*/
   testAllForType<32, 16, char>(11, 22, 33, 44);
   testAllForType<64, 32, char>(4, 5, 6, 7);
   testAllForType<32, 16, volatile char>(11, 22, 33, 44);
@@ -244,6 +355,5 @@ int main() {
   testAllForType<128, 128, Float4Vec>(4, 5, 6, 7);
 
   otherTests();
-
   return 0;
 }



More information about the cfe-commits mailing list