[libcxx-commits] [clang] [libcxx] [clang] Add builtin to clear padding bytes (prework for P0528R3) (PR #75371)

via libcxx-commits libcxx-commits at lists.llvm.org
Sat May 31 03:38:50 PDT 2025


https://github.com/huixie90 updated https://github.com/llvm/llvm-project/pull/75371

>From 88293bb9854150e6b2fecfea27aa8a7b6634709c Mon Sep 17 00:00:00 2001
From: zoecarver <z.zoelec2 at gmail.com>
Date: Sat, 2 Dec 2023 20:00:30 +0000
Subject: [PATCH 1/5] [Builtin] Add __builtin_clear_padding

Adds `__builtin_clear_padding` to zero all padding bits of a struct. This builtin should match the behavior of those in NVCC and GCC (and MSVC?). There are some tests in this patch but hopefully we'll also get tests from other compilers (so all builtins can be as similar as possible).

I'm planning to add support for unions, bitfields (both as members and members of sub-objects), and booleans as follow up patches.

Differential Revision: https://reviews.llvm.org/D87974

overlapping subobjects + opague pointer

union, rename, scalar types
---
 clang/include/clang/Basic/Builtins.td         |   5 +
 clang/lib/CodeGen/CGBuiltin.cpp               | 209 +++++
 clang/lib/Sema/SemaChecking.cpp               |  30 +
 .../builtin-clear-padding-codegen.cpp         | 112 +++
 clang/test/SemaCXX/builtin-clear-padding.cpp  |  15 +
 .../atomics/builtin_clear_padding.pass.cpp    | 807 ++++++++++++++++++
 6 files changed, 1178 insertions(+)
 create mode 100644 clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
 create mode 100644 clang/test/SemaCXX/builtin-clear-padding.cpp
 create mode 100644 libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index e43b87fb3c131..406cb2ac179ef 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -968,6 +968,11 @@ def IsWithinLifetime : LangBuiltin<"CXX_LANG"> {
   let Spellings = ["__builtin_is_within_lifetime"];
   let Attributes = [NoThrow, CustomTypeChecking, Consteval];
   let Prototype = "bool(void*)";
+
+def ClearPadding : LangBuiltin<"CXX_LANG"> {
+  let Spellings = ["__builtin_clear_padding"];
+  let Attributes = [NoThrow];
+  let Prototype = "void(void*)";
 }
 
 // GCC exception builtins
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index ca29a6dbe9c9a..ef090b5c70889 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -36,6 +36,9 @@
 #include "llvm/IR/MatrixBuilder.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/ScopedPrinter.h"
+#include "llvm/TargetParser/AArch64TargetParser.h"
+#include "llvm/TargetParser/X86TargetParser.h"
+#include <algorithm>
 #include <optional>
 #include <utility>
 
@@ -2554,6 +2557,205 @@ static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
   return RValue::get(CGF->Builder.CreateCall(UBF, Args));
 }
 
+template <class T>
+void RecursivelyClearPaddingImpl(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
+                                 size_t CurrentStartOffset,
+                                 size_t &RunningOffset, T &&WriteZeroAtOffset,
+                                 bool VisitVirtualBase);
+
+template <class T>
+void ClearPaddingStruct(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
+                        StructType *ST, size_t CurrentStartOffset,
+                        size_t &RunningOffset, T &&WriteZeroAtOffset,
+                        bool VisitVirtualBase) {
+  llvm::dbgs() << "clear padding struct: " << ST->getName().data() << '\n';
+  const auto &DL = CGF.CGM.getModule().getDataLayout();
+  auto *SL = DL.getStructLayout(ST);
+  auto *R = dyn_cast<CXXRecordDecl>(Ty->getAsRecordDecl());
+  if (!R) {
+    llvm::dbgs() << "Not a CXXRecordDecl\n";
+    return;
+  }
+  const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R);
+  if (ASTLayout.hasOwnVFPtr()) {
+    llvm::dbgs() << "vtable ptr. Incrementing RunningOffset from "
+                 << RunningOffset << " to "
+                 << RunningOffset + DL.getPointerSizeInBits() / 8 << '\n';
+    RunningOffset += DL.getPointerSizeInBits() / 8;
+  }
+  std::vector<std::pair<size_t, CXXBaseSpecifier>> Bases;
+  Bases.reserve(R->getNumBases());
+  // todo get vbases
+  for (auto Base : R->bases()) {
+    auto *BaseRecord = cast<CXXRecordDecl>(Base.getType()->getAsRecordDecl());
+    if (!Base.isVirtual()) {
+      auto Offset = static_cast<size_t>(
+          ASTLayout.getBaseClassOffset(BaseRecord).getQuantity());
+      Bases.emplace_back(Offset, Base);
+    }
+  }
+
+  auto VisitBases =
+      [&](std::vector<std::pair<size_t, CXXBaseSpecifier>> &BasesToVisit) {
+        std::sort(
+            BasesToVisit.begin(), BasesToVisit.end(),
+            [](const auto &P1, const auto &P2) { return P1.first < P2.first; });
+        for (const auto &Pair : BasesToVisit) {
+          // is it OK to use structured binding in clang? what is the language
+          // version?
+          auto Offset = Pair.first;
+          auto Base = Pair.second;
+
+          llvm::dbgs() << "visiting base at offset " << Offset << '\n';
+          // Recursively zero out base classes.
+          auto Index = SL->getElementContainingOffset(Offset);
+          Value *Idx = CGF.Builder.getSize(Index);
+          llvm::Type *CurrentBaseType = CGF.ConvertTypeForMem(Base.getType());
+          Value *BaseElement = CGF.Builder.CreateGEP(CurrentBaseType, Ptr, Idx);
+          RecursivelyClearPaddingImpl(CGF, BaseElement, Base.getType(),
+                                      CurrentStartOffset + Offset,
+                                      RunningOffset, WriteZeroAtOffset, false);
+        }
+      };
+
+  VisitBases(Bases);
+
+  size_t NumFields = std::distance(R->field_begin(), R->field_end());
+  std::vector<size_t> FieldOffsets;
+  FieldOffsets.reserve(NumFields);
+  auto CurrentField = R->field_begin();
+  for (size_t I = 0; I < NumFields; ++I, ++CurrentField) {
+    // Size needs to be in bytes so we can compare it later.
+    auto Offset = ASTLayout.getFieldOffset(I) / 8;
+    llvm::dbgs() << "visiting field at offset " << Offset << '\n';
+    auto Index = SL->getElementContainingOffset(Offset);
+    Value *Idx = CGF.Builder.getSize(Index);
+    llvm::Type *CurrentFieldType =
+        CGF.ConvertTypeForMem(CurrentField->getType());
+    Value *Element = CGF.Builder.CreateGEP(CurrentFieldType, Ptr, Idx);
+    RecursivelyClearPaddingImpl(CGF, Element, CurrentField->getType(),
+                                CurrentStartOffset + Offset, RunningOffset,
+                                WriteZeroAtOffset, true);
+  }
+
+  if (VisitVirtualBase) {
+
+    std::vector<std::pair<size_t, CXXBaseSpecifier>> VBases;
+    VBases.reserve(R->getNumVBases());
+    for (auto VBase : R->vbases()) {
+      auto *BaseRecord =
+          cast<CXXRecordDecl>(VBase.getType()->getAsRecordDecl());
+      auto Offset = static_cast<size_t>(
+          ASTLayout.getVBaseClassOffset(BaseRecord).getQuantity());
+      VBases.emplace_back(Offset, VBase);
+    }
+
+    VisitBases(VBases);
+  }
+}
+
+template <class T>
+void ClearPaddingConstantArray(CodeGenFunction &CGF, Value *Ptr,
+                               llvm::Type *Type, ConstantArrayType const *AT,
+                               size_t CurrentStartOffset, size_t &RunningOffset,
+                               T &&WriteZeroAtOffset) {
+  llvm::dbgs() << "clear padding constant array\n";
+  for (size_t ArrIndex = 0; ArrIndex < AT->getSize().getLimitedValue();
+       ++ArrIndex) {
+
+    QualType ElementQualType = AT->getElementType();
+
+    auto *ElementRecord = ElementQualType->getAsRecordDecl();
+    if (!ElementRecord) {
+      llvm::dbgs() << "null!\n";
+    }
+    auto ElementAlign =
+        ElementRecord
+            ? CGF.getContext().getASTRecordLayout(ElementRecord).getAlignment()
+            : CGF.getContext().getTypeAlignInChars(ElementQualType);
+
+    Address FieldElementAddr{Ptr, Type, ElementAlign};
+
+    auto Element = CGF.Builder.CreateConstArrayGEP(FieldElementAddr, ArrIndex);
+    auto *ElementType = CGF.ConvertTypeForMem(ElementQualType);
+    auto AllocSize =
+        CGF.CGM.getModule().getDataLayout().getTypeAllocSize(ElementType);
+    llvm::dbgs() << "clearing array index! " << ArrIndex << '\n';
+    RecursivelyClearPaddingImpl(CGF, Element.getBasePointer(), ElementQualType,
+                                CurrentStartOffset +
+                                    ArrIndex * AllocSize.getKnownMinValue(),
+                                RunningOffset, WriteZeroAtOffset, true);
+  }
+}
+
+template <class T>
+void RecursivelyClearPaddingImpl(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
+                                 size_t CurrentStartOffset,
+                                 size_t &RunningOffset, T &&WriteZeroAtOffset,
+                                 bool VisitVirtualBase) {
+
+  llvm::dbgs() << "clear padding before current  [" << RunningOffset << ", "
+               << CurrentStartOffset << ")\n";
+  for (; RunningOffset < CurrentStartOffset; ++RunningOffset) {
+    WriteZeroAtOffset(RunningOffset);
+  }
+  auto *Type = CGF.ConvertTypeForMem(Ty);
+  auto Size = CGF.CGM.getModule()
+                  .getDataLayout()
+                  .getTypeSizeInBits(Type)
+                  .getKnownMinValue() /
+              8;
+
+  if (auto *AT = dyn_cast<ConstantArrayType>(Ty)) {
+    ClearPaddingConstantArray(CGF, Ptr, Type, AT, CurrentStartOffset,
+                              RunningOffset, WriteZeroAtOffset);
+  } else if (auto *ST = dyn_cast<StructType>(Type); ST && Ty->isRecordType()) {
+    ClearPaddingStruct(CGF, Ptr, Ty, ST, CurrentStartOffset, RunningOffset,
+                       WriteZeroAtOffset, VisitVirtualBase);
+  } else if (Ty->isAtomicType()) {
+    RecursivelyClearPaddingImpl(CGF, Ptr, Ty.getAtomicUnqualifiedType(),
+                                CurrentStartOffset, RunningOffset,
+                                WriteZeroAtOffset, true);
+  } else {
+    llvm::dbgs() << "increment running offset from: " << RunningOffset << " to "
+                 << RunningOffset + Size << '\n';
+    RunningOffset =
+        std::max(RunningOffset, CurrentStartOffset + static_cast<size_t>(Size));
+  }
+}
+
+static void RecursivelyClearPadding(CodeGenFunction &CGF, Value *Ptr,
+                                    QualType Ty) {
+  auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
+  auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
+  auto WriteZeroAtOffset = [&](uint64_t Offset) {
+    auto *Index = ConstantInt::get(CGF.IntTy, Offset);
+    auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+    CGF.Builder.CreateAlignedStore(
+        Zero, Element,
+        CharUnits::One().alignmentAtOffset(CharUnits::fromQuantity(Offset)));
+  };
+
+  size_t RunningOffset = 0;
+
+  RecursivelyClearPaddingImpl(CGF, Ptr, Ty, 0, RunningOffset, WriteZeroAtOffset,
+                              true);
+
+  // Clear tail padding
+  auto *Type = CGF.ConvertTypeForMem(Ty);
+
+  auto Size = CGF.CGM.getModule()
+                  .getDataLayout()
+                  .getTypeAllocSize(Type)
+                  .getKnownMinValue();
+
+  llvm::dbgs() << "clear tail padding  [" << RunningOffset << ", " << Size
+               << ")\n";
+  for (; RunningOffset < Size; ++RunningOffset) {
+    WriteZeroAtOffset(RunningOffset);
+  }
+}
+
 RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
                                         const CallExpr *E,
                                         ReturnValueSlot ReturnValue) {
@@ -4766,6 +4968,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
 
     return RValue::get(Ptr);
   }
+  case Builtin::BI__builtin_clear_padding: {
+    const Expr *Op = E->getArg(0);
+    Value *Address = EmitScalarExpr(Op);
+    auto PointeeTy = Op->getType()->getPointeeType();
+    RecursivelyClearPadding(*this, Address, PointeeTy);
+    return RValue::get(nullptr);
+  }
   case Builtin::BI__sync_fetch_and_add:
   case Builtin::BI__sync_fetch_and_sub:
   case Builtin::BI__sync_fetch_and_or:
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 930e9083365a1..c28ac9017a831 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2435,7 +2435,37 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
     return BuiltinIsWithinLifetime(*this, TheCall);
   case Builtin::BI__builtin_trivially_relocate:
     return BuiltinTriviallyRelocate(*this, TheCall);
+  case Builtin::BI__builtin_clear_padding: {
+    const auto numArgs = TheCall->getNumArgs();
+    if (numArgs < 1) {
+      Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_few_args_one)
+          << 0 /*function call*/ << "T*" << 0;
+      return ExprError();
+    }
+    if (numArgs > 1) {
+      Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_many_args_one)
+          << 0 /*function call*/ << "T*" << numArgs << 0;
+      return ExprError();
+    }
 
+    const Expr *PtrArg = TheCall->getArg(0);
+    const QualType PtrArgType = PtrArg->getType();
+    if (!PtrArgType->isPointerType()) {
+      Diag(PtrArg->getBeginLoc(), diag::err_typecheck_convert_incompatible)
+          << PtrArgType << "pointer" << 1 << 0 << 3 << 1 << PtrArgType
+          << "pointer";
+      return ExprError();
+    }
+    if (PtrArgType->getPointeeType().isConstQualified()) {
+      Diag(PtrArg->getBeginLoc(), diag::err_typecheck_assign_const)
+          << TheCall->getSourceRange() << 5 /*ConstUnknown*/;
+      return ExprError();
+    }
+    if (RequireCompleteType(PtrArg->getBeginLoc(), PtrArgType->getPointeeType(),
+                            diag::err_typecheck_decl_incomplete_type))
+      return ExprError();
+    break;
+  }
   case Builtin::BI__sync_fetch_and_add:
   case Builtin::BI__sync_fetch_and_add_1:
   case Builtin::BI__sync_fetch_and_add_2:
diff --git a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
new file mode 100644
index 0000000000000..54455e6699849
--- /dev/null
+++ b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
@@ -0,0 +1,112 @@
+// RUN: %clang_cc1 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+struct alignas(4) Foo {
+  char a;
+  alignas(2) char b;
+};
+
+struct alignas(4) Bar {
+  char c;
+  alignas(2) char d;
+};
+
+struct alignas(4) Baz : Foo {
+  char e;
+  Bar f;
+};
+
+// Baz structure:
+// "a", PAD_1, "b", PAD_2, "c", PAD_3, PAD_4, PAD_5, "c", PAD_6, "d", PAD_7
+// %struct.Baz = type { %struct.Foo, i8, [3 x i8], %struct.Bar }
+// %struct.Foo = type { i8, i8, i8, i8 }
+// %struct.Bar = type { i8, i8, i8, i8 }
+
+// CHECK-LABEL: define void @_Z7testBazP3Baz(%struct.Baz* %baz)
+// CHECK: [[ADDR:%.*]] = alloca %struct.Baz*
+// CHECK: store %struct.Baz* %baz, %struct.Baz** [[ADDR]]
+// CHECK: [[BAZ:%.*]] = load %struct.Baz*, %struct.Baz** [[ADDR]]
+// CHECK: [[BAZ_RAW_PTR:%.*]] = bitcast %struct.Baz* [[BAZ]] to i8*
+
+// CHECK: [[FOO_BASE:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 0
+// CHECK: [[FOO_RAW_PTR:%.*]] = bitcast %struct.Foo* [[FOO_BASE]] to i8*
+// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 1
+// CHECK: store i8 0, i8* [[PAD_1]]
+// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 3
+// CHECK: store i8 0, i8* [[PAD_2]]
+
+// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 5
+// CHECK: store i8 0, i8* [[PAD_3]]
+// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 6
+// CHECK: store i8 0, i8* [[PAD_4]]
+// CHECK: [[PAD_5:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 7
+// CHECK: store i8 0, i8* [[PAD_5]]
+
+// CHECK: [[BAR_MEMBER:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 3
+// CHECK: [[BAR_RAW_PTR:%.*]] = bitcast %struct.Bar* [[BAR_MEMBER]] to i8*
+// CHECK: [[PAD_6:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 1
+// CHECK: store i8 0, i8* [[PAD_6]]
+// CHECK: [[PAD_7:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 3
+// CHECK: store i8 0, i8* [[PAD_7]]
+// CHECK: ret void
+void testBaz(Baz *baz) {
+  __builtin_clear_padding(baz);
+}
+
+struct UnsizedTail {
+  int size;
+  alignas(8) char buf[];
+
+  UnsizedTail(int size) : size(size) {}
+};
+
+// UnsizedTail structure:
+// "size", PAD_1, PAD_2, PAD_3, PAD_4
+// %struct.UnsizedTail = type { i32, [4 x i8], [0 x i8] }
+
+// CHECK-LABEL: define void @_Z15testUnsizedTailP11UnsizedTail(%struct.UnsizedTail* %u)
+// CHECK: [[U_ADDR:%.*]] = alloca %struct.UnsizedTail*
+// CHECK: store %struct.UnsizedTail* %u, %struct.UnsizedTail** [[U_ADDR]]
+// CHECK: [[U:%.*]] = load %struct.UnsizedTail*, %struct.UnsizedTail** [[U_ADDR]]
+// CHECK: [[U_RAW_PTR:%.*]] = bitcast %struct.UnsizedTail* [[U]] to i8*
+// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 4
+// CHECK: store i8 0, i8* [[PAD_1]]
+// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 5
+// CHECK: store i8 0, i8* [[PAD_2]]
+// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 6
+// CHECK: store i8 0, i8* [[PAD_3]]
+// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 7
+// CHECK: store i8 0, i8* [[PAD_4]]
+// CHECK: ret void
+void testUnsizedTail(UnsizedTail *u) {
+  __builtin_clear_padding(u);
+}
+
+struct ArrOfStructsWithPadding {
+  Bar bars[2];
+};
+
+// ArrOfStructsWithPadding structure:
+// "c" (1), PAD_1, "d" (1), PAD_2, "c" (2), PAD_3, "d" (2), PAD_4
+// %struct.ArrOfStructsWithPadding = type { [2 x %struct.Bar] }
+
+// CHECK-LABEL: define void @_Z27testArrOfStructsWithPaddingP23ArrOfStructsWithPadding(%struct.ArrOfStructsWithPadding* %arr)
+// CHECK: [[ARR_ADDR:%.*]] = alloca %struct.ArrOfStructsWithPadding*
+// CHECK: store %struct.ArrOfStructsWithPadding* %arr, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
+// CHECK: [[ARR:%.*]] = load %struct.ArrOfStructsWithPadding*, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
+// CHECK: [[BARS:%.*]] = getelementptr inbounds %struct.ArrOfStructsWithPadding, %struct.ArrOfStructsWithPadding* [[ARR]], i32 0, i32 0
+// CHECK: [[FIRST:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 0
+// CHECK: [[FIRST_RAW_PTR:%.*]] = bitcast %struct.Bar* [[FIRST]] to i8*
+// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FIRST_RAW_PTR]], i32 1
+// CHECK: store i8 0, i8* [[PAD_1]]
+// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* %4, i32 3
+// CHECK: store i8 0, i8* [[PAD_2]]
+// CHECK: [[SECOND:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 1
+// CHECK: [[SECOND_RAW_PTR:%.*]] = bitcast %struct.Bar* [[SECOND]] to i8*
+// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 1
+// CHECK: store i8 0, i8* [[PAD_3]]
+// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 3
+// CHECK: store i8 0, i8* [[PAD_4]]
+// CHECK: ret void
+void testArrOfStructsWithPadding(ArrOfStructsWithPadding *arr) {
+  __builtin_clear_padding(arr);
+}
diff --git a/clang/test/SemaCXX/builtin-clear-padding.cpp b/clang/test/SemaCXX/builtin-clear-padding.cpp
new file mode 100644
index 0000000000000..ea87249c87b0a
--- /dev/null
+++ b/clang/test/SemaCXX/builtin-clear-padding.cpp
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+struct Foo {};
+
+struct Incomplete; // expected-note {{forward declaration of 'Incomplete'}}
+
+void test(int a, Foo b, void *c, int *d, Foo *e, const Foo *f, Incomplete *g) {
+  __builtin_clear_padding(a); // expected-error {{passing 'int' to parameter of incompatible type pointer: type mismatch at 1st parameter ('int' vs pointer)}}
+  __builtin_clear_padding(b); // expected-error {{passing 'Foo' to parameter of incompatible type pointer: type mismatch at 1st parameter ('Foo' vs pointer)}}
+  __builtin_clear_padding(c); // expected-error {{variable has incomplete type 'void'}}
+  __builtin_clear_padding(d); // This should not error.
+  __builtin_clear_padding(e); // This should not error.
+  __builtin_clear_padding(f); // expected-error {{read-only variable is not assignable}}
+  __builtin_clear_padding(g); // expected-error {{variable has incomplete type 'Incomplete'}}
+}
diff --git a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
new file mode 100644
index 0000000000000..d504ac58e43ae
--- /dev/null
+++ b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
@@ -0,0 +1,807 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// UNSUPPORTED: c++03
+
+// ADDITIONAL_COMPILE_FLAGS: -Wno-deprecated-volatile -Wno-dynamic-class-memaccess
+
+#include <cassert>
+#include <cstdio>
+#include <cstring>
+#include <new>
+
+template <class T>
+void print_bytes(const T* object) {
+  auto size                        = sizeof(T);
+  const unsigned char* const bytes = reinterpret_cast<const unsigned char*>(object);
+  size_t i;
+
+  fprintf(stderr, "[ ");
+  for (i = 0; i < size; i++) {
+    fprintf(stderr, "%02x ", bytes[i]);
+  }
+  fprintf(stderr, "]\n");
+}
+
+template <class T>
+void __builtin_clear_padding2(T t) {
+  __builtin_clear_padding(t);
+}
+
+template <size_t A1, size_t A2, class T>
+struct alignas(A1) BasicWithPadding {
+  T x;
+  alignas(A2) T y;
+};
+
+template <size_t A1, size_t A2, size_t N, class T>
+struct alignas(A1) SpacedArrayMembers {
+  T x[N];
+  alignas(A2) char c;
+  T y[N];
+};
+
+template <size_t A1, size_t A2, class T>
+struct alignas(A1) PaddedPointerMembers {
+  T* x;
+  alignas(A2) T* y;
+};
+
+template <size_t A1, size_t A2, size_t A3, class T>
+struct alignas(A1) ThreeMembers {
+  T x;
+  alignas(A2) T y;
+  alignas(A3) T z;
+};
+
+template <class T>
+struct Normal {
+  T a;
+  T b;
+};
+
+template <class T>
+struct X {
+  T x;
+};
+
+template <class T>
+struct Z {
+  T z;
+};
+
+template <size_t A, class T>
+struct YZ : public Z<T> {
+  alignas(A) T y;
+};
+
+template <size_t A1, size_t A2, class T>
+struct alignas(A1) HasBase : public X<T>, public YZ<A2, T> {
+  T a;
+  alignas(A2) T b;
+};
+
+template <size_t A1, size_t A2, class T>
+void testAllStructsForType(T a, T b, T c, T d) {
+  // basic padding
+  {
+    using B = BasicWithPadding<A1, A2, T>;
+    B basic1;
+    memset(&basic1, 0, sizeof(B));
+    basic1.x = a;
+    basic1.y = b;
+    B basic2;
+    memset(&basic2, 42, sizeof(B));
+    basic2.x = a;
+    basic2.y = b;
+    assert(memcmp(&basic1, &basic2, sizeof(B)) != 0);
+    __builtin_clear_padding2(&basic2);
+    assert(memcmp(&basic1, &basic2, sizeof(B)) == 0);
+  }
+
+  // spaced array
+  {
+    using A = SpacedArrayMembers<A1, A2, 2, T>;
+    A arr1;
+    memset(&arr1, 0, sizeof(A));
+    arr1.x[0] = a;
+    arr1.x[1] = b;
+    arr1.y[0] = c;
+    arr1.y[1] = d;
+    A arr2;
+    memset(&arr2, 42, sizeof(A));
+    arr2.x[0] = a;
+    arr2.x[1] = b;
+    arr2.y[0] = c;
+    arr2.y[1] = d;
+    arr2.c    = 0;
+    assert(memcmp(&arr1, &arr2, sizeof(A)) != 0);
+    __builtin_clear_padding2(&arr2);
+    assert(memcmp(&arr1, &arr2, sizeof(A)) == 0);
+  }
+
+  // pointer members
+  {
+    using P = PaddedPointerMembers<A1, A2, T>;
+    P ptr1;
+    memset(&ptr1, 0, sizeof(P));
+    ptr1.x = &a;
+    ptr1.y = &b;
+    P ptr2;
+    memset(&ptr2, 42, sizeof(P));
+    ptr2.x = &a;
+    ptr2.y = &b;
+    assert(memcmp(&ptr1, &ptr2, sizeof(P)) != 0);
+    __builtin_clear_padding2(&ptr2);
+    assert(memcmp(&ptr1, &ptr2, sizeof(P)) == 0);
+  }
+
+  // three members
+  {
+    using Three = ThreeMembers<A1, A2, A2, T>;
+    Three three1;
+    memset(&three1, 0, sizeof(Three));
+    three1.x = a;
+    three1.y = b;
+    three1.z = c;
+    Three three2;
+    memset(&three2, 42, sizeof(Three));
+    three2.x = a;
+    three2.y = b;
+    three2.z = c;
+    __builtin_clear_padding2(&three2);
+    assert(memcmp(&three1, &three2, sizeof(Three)) == 0);
+  }
+
+  // Normal struct no padding
+  {
+    using N = Normal<T>;
+    N normal1;
+    memset(&normal1, 0, sizeof(N));
+    normal1.a = a;
+    normal1.b = b;
+    N normal2;
+    memset(&normal2, 42, sizeof(N));
+    normal2.a = a;
+    normal2.b = b;
+    __builtin_clear_padding2(&normal2);
+    assert(memcmp(&normal1, &normal2, sizeof(N)) == 0);
+  }
+
+  // base class
+  {
+    using H = HasBase<A1, A2, T>;
+    H base1;
+    memset(&base1, 0, sizeof(H));
+    base1.a = a;
+    base1.b = b;
+    base1.x = c;
+    base1.y = d;
+    base1.z = a;
+    H base2;
+    memset(&base2, 42, sizeof(H));
+    base2.a = a;
+    base2.b = b;
+    base2.x = c;
+    base2.y = d;
+    base2.z = a;
+    assert(memcmp(&base1, &base2, sizeof(H)) != 0);
+    __builtin_clear_padding2(&base2);
+    assert(memcmp(&base1, &base2, sizeof(H)) == 0);
+  }
+}
+
+struct UnsizedTail {
+  int size;
+  alignas(8) char buf[];
+
+  UnsizedTail(int size) : size(size) {}
+};
+
+void otherStructTests() {
+  // Unsized Tail
+  {
+    const size_t size1 = sizeof(UnsizedTail) + 4;
+    char buff1[size1];
+    char buff2[size1];
+    memset(buff1, 0, size1);
+    memset(buff2, 42, size1);
+    auto* u1   = new (buff1) UnsizedTail(4);
+    u1->buf[0] = 1;
+    u1->buf[1] = 2;
+    u1->buf[2] = 3;
+    u1->buf[3] = 4;
+    auto* u2   = new (buff2) UnsizedTail(4);
+    u2->buf[0] = 1;
+    u2->buf[1] = 2;
+    u2->buf[2] = 3;
+    u2->buf[3] = 4;
+    assert(memcmp(u1, u2, sizeof(UnsizedTail)) != 0);
+    __builtin_clear_padding2(u2);
+
+    assert(memcmp(u1, u2, sizeof(UnsizedTail)) == 0);
+  }
+
+  // basic padding on the heap
+  {
+    using B      = BasicWithPadding<8, 4, char>;
+    auto* basic1 = new B;
+    memset(basic1, 0, sizeof(B));
+    basic1->x    = 1;
+    basic1->y    = 2;
+    auto* basic2 = new B;
+    memset(basic2, 42, sizeof(B));
+    basic2->x = 1;
+    basic2->y = 2;
+    assert(memcmp(basic1, basic2, sizeof(B)) != 0);
+    __builtin_clear_padding2(basic2);
+    assert(memcmp(basic1, basic2, sizeof(B)) == 0);
+    delete basic2;
+    delete basic1;
+  }
+
+  // basic padding volatile on the heap
+  {
+    using B   = BasicWithPadding<8, 4, char>;
+    B* basic3 = new B;
+    memset(basic3, 0, sizeof(B));
+    basic3->x = 1;
+    basic3->y = 2;
+    B* basic4 = new B;
+    memset(basic4, 42, sizeof(B));
+    basic4->x = 1;
+    basic4->y = 2;
+    assert(memcmp(basic3, basic4, sizeof(B)) != 0);
+    __builtin_clear_padding2(const_cast<volatile B*>(basic4));
+    __builtin_clear_padding2(basic4);
+    assert(memcmp(basic3, basic4, sizeof(B)) == 0);
+    delete basic4;
+    delete basic3;
+  }
+}
+
+struct Foo {
+  int x;
+  int y;
+};
+
+typedef float Float4Vec __attribute__((ext_vector_type(4)));
+typedef float Float3Vec __attribute__((ext_vector_type(3)));
+
+void primitiveTests() {
+  // no padding
+  {
+    int i1 = 42, i2 = 42;
+    __builtin_clear_padding2(&i1); // does nothing
+    assert(i1 == 42);
+    assert(memcmp(&i1, &i2, sizeof(int)) == 0);
+  }
+
+  // long double
+  {
+    long double d1, d2;
+    memset(&d1, 42, sizeof(long double));
+    memset(&d2, 0, sizeof(long double));
+
+    d1 = 3.0L;
+    d2 = 3.0L;
+
+    __builtin_clear_padding2(&d1);
+    assert(d1 == 3.0L);
+    assert(memcmp(&d1, &d2, sizeof(long double)) == 0);
+  }
+}
+
+void structTests() {
+  // no_unique_address
+  {
+    struct S1 {
+      int x;
+      char c;
+    };
+
+    struct S2 {
+      [[no_unique_address]] S1 s;
+      bool b;
+    };
+
+    S2 s1, s2;
+    memset(&s1, 42, sizeof(S2));
+    memset(&s2, 0, sizeof(S2));
+
+    s1.s.x = 4;
+    s1.s.c = 'a';
+    s1.b   = true;
+    s2.s.x = 4;
+    s2.s.c = 'a';
+    s2.b   = true;
+
+    assert(memcmp(&s1, &s2, sizeof(S2)) != 0);
+    __builtin_clear_padding2(&s1);
+    assert(s1.s.x == 4);
+    assert(s1.s.c == 'a');
+    assert(s1.b == true);
+
+    assert(memcmp(&s1, &s2, sizeof(S2)) == 0);
+  }
+
+  // struct with long double
+  {
+    struct S {
+      long double l;
+      bool b;
+    };
+
+    S s1, s2;
+    memset(&s1, 42, sizeof(S));
+    memset(&s2, 0, sizeof(S));
+
+    s1.l = 3.0L;
+    s1.b = true;
+    s2.l = 3.0L;
+    s2.b = true;
+
+    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    __builtin_clear_padding2(&s1);
+    assert(s1.l == 3.0L);
+    assert(s1.b == true);
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+  }
+
+  // EBO
+  {
+    struct Empty {};
+    struct B {
+      int i;
+    };
+    struct S : Empty, B {
+      bool b;
+    };
+
+    S s1, s2;
+    memset(&s1, 42, sizeof(S));
+    memset(&s2, 0, sizeof(S));
+
+    s1.i = 4;
+    s1.b = true;
+    s2.i = 4;
+    s2.b = true;
+
+    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    __builtin_clear_padding2(&s1);
+    assert(s1.i == 4);
+    assert(s1.b == true);
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+  }
+
+  // padding between bases
+  {
+    struct B1 {
+      char c1;
+    };
+    struct B2 {
+      alignas(4) char c2;
+    };
+
+    struct S : B1, B2 {};
+
+    S s1, s2;
+    memset(&s1, 42, sizeof(S));
+    memset(&s2, 0, sizeof(S));
+
+    s1.c1 = 'a';
+    s1.c2 = 'b';
+    s2.c1 = 'a';
+    s2.c2 = 'b';
+
+    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    __builtin_clear_padding2(&s1);
+    assert(s1.c1 == 'a');
+    assert(s1.c2 == 'b');
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+  }
+
+  // padding after last base
+  {
+    struct B1 {
+      char c1;
+    };
+    struct B2 {
+      char c2;
+    };
+
+    struct S : B1, B2 {
+      alignas(4) char c3;
+    };
+
+    S s1, s2;
+    memset(&s1, 42, sizeof(S));
+    memset(&s2, 0, sizeof(S));
+
+    s1.c1 = 'a';
+    s1.c2 = 'b';
+    s1.c3 = 'c';
+    s2.c1 = 'a';
+    s2.c2 = 'b';
+    s2.c3 = 'c';
+
+    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    __builtin_clear_padding2(&s1);
+    assert(s1.c1 == 'a');
+    assert(s1.c2 == 'b');
+    assert(s1.c3 == 'c');
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+  }
+
+  // vtable
+  {
+    struct VirtualBase {
+      unsigned int x;
+      virtual int call() { return x; };
+      virtual ~VirtualBase() = default;
+    };
+
+    struct NonVirtualBase {
+      char y;
+    };
+
+    struct S : VirtualBase, NonVirtualBase {
+      virtual int call() override { return 5; }
+      bool z;
+    };
+
+    char buff1[sizeof(S)];
+    char buff2[sizeof(S)];
+    memset(buff1, 0, sizeof(S));
+    memset(buff2, 42, sizeof(S));
+
+    S* s1 = new (&buff1) S;
+    S* s2 = new (&buff2) S;
+
+    s1->x = 0xFFFFFFFF;
+    s2->x = 0xFFFFFFFF;
+    s1->y = 'a';
+    s2->y = 'a';
+    s1->z = true;
+    s2->z = true;
+    __builtin_clear_padding2(s2);
+    assert(s2->x == 0xFFFFFFFF);
+    assert(s2->y == 'a');
+    assert(s2->z == true);
+    assert(s2->call() == 5);
+    assert(memcmp(s1, s2, sizeof(S)) == 0);
+  }
+
+  // multiple bases with vtable
+  {
+    struct VirtualBase1 {
+      unsigned int x1;
+      virtual int call1() { return x1; };
+      virtual ~VirtualBase1() = default;
+    };
+
+    struct VirtualBase2 {
+      unsigned int x2;
+      virtual int call2() { return x2; };
+      virtual ~VirtualBase2() = default;
+    };
+
+    struct VirtualBase3 {
+      unsigned int x3;
+      virtual int call3() { return x3; };
+      virtual ~VirtualBase3() = default;
+    };
+
+    struct NonVirtualBase {
+      char y;
+    };
+
+    struct S : VirtualBase1, VirtualBase2, NonVirtualBase, VirtualBase3 {
+      virtual int call1() override { return 5; }
+      bool z;
+    };
+
+    char buff1[sizeof(S)];
+    char buff2[sizeof(S)];
+    memset(buff1, 0, sizeof(S));
+    memset(buff2, 42, sizeof(S));
+
+    S* s1 = new (&buff1) S;
+    S* s2 = new (&buff2) S;
+
+    s1->x1 = 0xFFFFFFFF;
+    s2->x1 = 0xFFFFFFFF;
+    s1->x2 = 0xFAFAFAFA;
+    s2->x2 = 0xFAFAFAFA;
+    s1->x3 = 0xAAAAAAAA;
+    s2->x3 = 0xAAAAAAAA;
+    s1->y  = 'a';
+    s2->y  = 'a';
+    s1->z  = true;
+    s2->z  = true;
+    __builtin_clear_padding2(s2);
+    assert(s2->x1 == 0xFFFFFFFF);
+    assert(s2->x2 == 0xFAFAFAFA);
+    assert(s2->x3 == 0xAAAAAAAA);
+    assert(s2->y == 'a');
+    assert(s2->z == true);
+    assert(s2->call1() == 5);
+    assert(memcmp(s1, s2, sizeof(S)) == 0);
+  }
+
+  // chain of bases with virtual functions
+  {
+    struct VirtualBase1 {
+      unsigned int x1;
+      virtual int call1() { return x1; };
+      virtual ~VirtualBase1() = default;
+    };
+
+    struct VirtualBase2 : VirtualBase1 {
+      unsigned int x2;
+      virtual int call2() { return x2; };
+      virtual ~VirtualBase2() = default;
+    };
+
+    struct VirtualBase3 : VirtualBase2 {
+      unsigned int x3;
+      virtual int call3() { return x3; };
+      virtual ~VirtualBase3() = default;
+    };
+
+    struct NonVirtualBase {
+      char y;
+    };
+
+    struct S : NonVirtualBase, VirtualBase3 {
+      //virtual int call() override { return 5; }
+      bool z;
+    };
+
+    char buff1[sizeof(S)];
+    char buff2[sizeof(S)];
+    memset(buff1, 0, sizeof(S));
+    memset(buff2, 42, sizeof(S));
+    S* s1 = new (&buff1) S;
+    S* s2 = new (&buff2) S;
+
+    s1->x1 = 0xFFFFFFFF;
+    s2->x1 = 0xFFFFFFFF;
+    s1->x2 = 0xFAFAFAFA;
+    s2->x2 = 0xFAFAFAFA;
+    s1->x3 = 0xAAAAAAAA;
+    s2->x3 = 0xAAAAAAAA;
+    s1->y  = 'a';
+    s2->y  = 'a';
+    s1->z  = true;
+    s2->z  = true;
+    __builtin_clear_padding2(s2);
+    assert(memcmp(s1, s2, sizeof(S)) == 0);
+  }
+
+  // virtual inheritance
+  {
+    struct Base {
+      int x;
+    };
+    struct D1 : virtual Base {
+      int d1;
+      bool b1;
+    };
+    struct D2 : virtual Base {
+      int d2;
+      bool b2;
+    };
+
+    struct S : D1, D2 {
+      bool s;
+    };
+
+    char buff1[sizeof(S)];
+    char buff2[sizeof(S)];
+    memset(buff1, 0, sizeof(S));
+    memset(buff2, 42, sizeof(S));
+    S* s1 = new (&buff1) S;
+    S* s2 = new (&buff2) S;
+
+    s1->x  = 0xFFFFFFFF;
+    s2->x  = 0xFFFFFFFF;
+    s1->d1 = 0xFAFAFAFA;
+    s2->d1 = 0xFAFAFAFA;
+    s1->d2 = 0xAAAAAAAA;
+    s2->d2 = 0xAAAAAAAA;
+    s1->b1 = true;
+    s2->b1 = true;
+    s1->b2 = true;
+    s2->b2 = true;
+    s1->s  = true;
+    s2->s  = true;
+    __builtin_clear_padding2(s2);
+    assert(memcmp(s1, s2, sizeof(S)) == 0);
+  }
+
+  // bit fields
+  {
+    struct S {
+      // will usually occupy 2 bytes:
+      unsigned char b1 : 3; // 1st 3 bits (in 1st byte) are b1
+      unsigned char    : 2; // next 2 bits (in 1st byte) are blocked out as unused
+      unsigned char b2 : 6; // 6 bits for b2 - doesn't fit into the 1st byte => starts a 2nd
+      unsigned char b3 : 2; // 2 bits for b3 - next (and final) bits in the 2nd byte
+    };
+
+    S s1, s2;
+    memset(&s1, 0, sizeof(S));
+    memset(&s2, 42, sizeof(S));
+
+    s1.b1 = 5;
+    s2.b1 = 5;
+    s1.b2 = 27;
+    s2.b2 = 27;
+    s1.b3 = 3;
+    s2.b3 = 3;
+    __builtin_clear_padding(&s2);
+    print_bytes(&s1);
+    print_bytes(&s2);
+    //TODO
+    //assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+  }
+
+  testAllStructsForType<32, 16, char>(11, 22, 33, 44);
+  testAllStructsForType<64, 32, char>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, volatile char>(11, 22, 33, 44);
+  testAllStructsForType<64, 32, volatile char>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, int>(0, 1, 2, 3);
+  testAllStructsForType<64, 32, int>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, volatile int>(0, 1, 2, 3);
+  testAllStructsForType<64, 32, volatile int>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, double>(0, 1, 2, 3);
+  testAllStructsForType<64, 32, double>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, _BitInt(28)>(0, 1, 2, 3);
+  testAllStructsForType<64, 32, _BitInt(28)>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, _BitInt(60)>(0, 1, 2, 3);
+  testAllStructsForType<64, 32, _BitInt(60)>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, _BitInt(64)>(0, 1, 2, 3);
+  testAllStructsForType<64, 32, _BitInt(64)>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, Foo>(Foo{1, 2}, Foo{3, 4}, Foo{1, 2}, Foo{3, 4});
+  testAllStructsForType<64, 32, Foo>(Foo{1, 2}, Foo{3, 4}, Foo{1, 2}, Foo{3, 4});
+  testAllStructsForType<256, 128, Float3Vec>(0, 1, 2, 3);
+  testAllStructsForType<128, 128, Float3Vec>(4, 5, 6, 7);
+  testAllStructsForType<256, 128, Float4Vec>(0, 1, 2, 3);
+  testAllStructsForType<128, 128, Float4Vec>(4, 5, 6, 7);
+
+  otherStructTests();
+}
+
+void unionTests() {
+  // different length, do not clear object repr bits of non-active member
+  {
+    union u {
+      int i;
+      char c;
+    };
+
+    u u1, u2;
+    memset(&u1, 42, sizeof(u));
+    memset(&u2, 42, sizeof(u));
+    u1.c = '4';
+    u2.c = '4';
+
+    __builtin_clear_padding2(&u1); // should have no effect
+    assert(u1.c == '4');
+
+    assert(memcmp(&u1, &u2, sizeof(u)) == 0);
+  }
+
+  // tail padding of longest member
+  {
+    struct s {
+      alignas(8) char c1;
+    };
+
+    union u {
+      s s1;
+      char c2;
+    };
+
+    u u1, u2;
+    memset(&u1, 42, sizeof(u));
+    memset(&u2, 0, sizeof(u));
+
+    u1.s1.c1 = '4';
+    u2.s1.c1 = '4';
+
+    assert(memcmp(&u1, &u2, sizeof(u)) != 0);
+    __builtin_clear_padding2(&u1);
+    assert(u1.s1.c1 == '4');
+    assert(memcmp(&u1, &u2, sizeof(u)) == 0);
+  }
+}
+
+void arrayTests() {
+  // no padding
+  {
+    int i1[2] = {1, 2};
+    int i2[2] = {1, 2};
+
+    __builtin_clear_padding2(&i1);
+    assert(i1[0] == 1);
+    assert(i1[1] == 2);
+    assert(memcmp(&i1, &i2, 2 * sizeof(int)) == 0);
+  }
+
+  // long double
+  {
+    long double d1[2], d2[2];
+    memset(&d1, 42, 2 * sizeof(long double));
+    memset(&d2, 0, 2 * sizeof(long double));
+
+    d1[0] = 3.0L;
+    d1[1] = 4.0L;
+    d2[0] = 3.0L;
+    d2[1] = 4.0L;
+
+    __builtin_clear_padding2(&d1);
+    assert(d1[0] == 3.0L);
+    assert(d2[1] == 4.0L);
+    assert(memcmp(&d1, &d2, 2 * sizeof(long double)) == 0);
+  }
+
+  // struct
+  {
+    struct S {
+      int i1;
+      char c1;
+      int i2;
+      char c2;
+    };
+
+    S s1[2], s2[2];
+    memset(&s1, 42, 2 * sizeof(S));
+    memset(&s2, 0, 2 * sizeof(S));
+
+    s1[0].i1 = 1;
+    s1[0].c1 = 'a';
+    s1[0].i2 = 2;
+    s1[0].c2 = 'b';
+    s1[1].i1 = 3;
+    s1[1].c1 = 'c';
+    s1[1].i2 = 4;
+    s1[1].c2 = 'd';
+
+    s2[0].i1 = 1;
+    s2[0].c1 = 'a';
+    s2[0].i2 = 2;
+    s2[0].c2 = 'b';
+    s2[1].i1 = 3;
+    s2[1].c1 = 'c';
+    s2[1].i2 = 4;
+    s2[1].c2 = 'd';
+
+    assert(memcmp(&s1, &s2, 2 * sizeof(S)) != 0);
+    __builtin_clear_padding2(&s1);
+
+    assert(s1[0].i1 == 1);
+    assert(s1[0].c1 == 'a');
+    assert(s1[0].i2 == 2);
+    assert(s1[0].c2 == 'b');
+    assert(s1[1].i1 == 3);
+    assert(s1[1].c1 == 'c');
+    assert(s1[1].i2 == 4);
+    assert(s1[1].c2 == 'd');
+    assert(memcmp(&s1, &s2, 2 * sizeof(S)) == 0);
+  }
+}
+
+int main(int, const char**) {
+  primitiveTests();
+  unionTests();
+  structTests();
+  arrayTests();
+
+  return 0;
+}

>From 4db4d246dcbcec65337c3ee8c82259c9b7b9558e Mon Sep 17 00:00:00 2001
From: Hui <hui.xie0621 at gmail.com>
Date: Sun, 16 Jun 2024 00:05:25 +0100
Subject: [PATCH 2/5] new approach

---
 clang/lib/CodeGen/CGBuiltin.cpp               | 411 ++++++++++--------
 .../atomics/builtin_clear_padding.pass.cpp    | 181 ++++----
 2 files changed, 333 insertions(+), 259 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index ef090b5c70889..408ca84be49cf 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -41,6 +41,9 @@
 #include <algorithm>
 #include <optional>
 #include <utility>
+#include <deque>
+#include <vector>
+#include <sstream>
 
 using namespace clang;
 using namespace CodeGen;
@@ -2557,204 +2560,254 @@ static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
   return RValue::get(CGF->Builder.CreateCall(UBF, Args));
 }
 
-template <class T>
-void RecursivelyClearPaddingImpl(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
-                                 size_t CurrentStartOffset,
-                                 size_t &RunningOffset, T &&WriteZeroAtOffset,
-                                 bool VisitVirtualBase);
-
-template <class T>
-void ClearPaddingStruct(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
-                        StructType *ST, size_t CurrentStartOffset,
-                        size_t &RunningOffset, T &&WriteZeroAtOffset,
-                        bool VisitVirtualBase) {
-  llvm::dbgs() << "clear padding struct: " << ST->getName().data() << '\n';
-  const auto &DL = CGF.CGM.getModule().getDataLayout();
-  auto *SL = DL.getStructLayout(ST);
-  auto *R = dyn_cast<CXXRecordDecl>(Ty->getAsRecordDecl());
-  if (!R) {
-    llvm::dbgs() << "Not a CXXRecordDecl\n";
-    return;
-  }
-  const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R);
-  if (ASTLayout.hasOwnVFPtr()) {
-    llvm::dbgs() << "vtable ptr. Incrementing RunningOffset from "
-                 << RunningOffset << " to "
-                 << RunningOffset + DL.getPointerSizeInBits() / 8 << '\n';
-    RunningOffset += DL.getPointerSizeInBits() / 8;
-  }
-  std::vector<std::pair<size_t, CXXBaseSpecifier>> Bases;
-  Bases.reserve(R->getNumBases());
-  // todo get vbases
-  for (auto Base : R->bases()) {
-    auto *BaseRecord = cast<CXXRecordDecl>(Base.getType()->getAsRecordDecl());
-    if (!Base.isVirtual()) {
-      auto Offset = static_cast<size_t>(
-          ASTLayout.getBaseClassOffset(BaseRecord).getQuantity());
-      Bases.emplace_back(Offset, Base);
+namespace {
+
+struct PaddingClearer {
+  PaddingClearer(CodeGenFunction &F)
+      : CGF(F), CharWidth(CGF.getContext().getCharWidth()) {}
+
+  void run(Value *Ptr, QualType Ty) {
+    OccuppiedIntervals.clear();
+    Queue.clear();
+
+    Queue.push_back(Data{0, Ty, true});
+    while (!Queue.empty()) {
+      auto Current = Queue.front();
+      Queue.pop_front();
+      Visit(Current);
+    }
+
+    MergeOccuppiedIntervals();
+    auto PaddingIntervals =
+        GetPaddingIntervals(CGF.getContext().getTypeSize(Ty));
+    llvm::dbgs() << "Occuppied Bits:\n";
+    for (auto [first, last] : OccuppiedIntervals) {
+      llvm::dbgs() << "[" << first << ", " << last << ")\n";
+    }
+    llvm::dbgs() << "Padding Bits:\n";
+    for (auto [first, last] : PaddingIntervals) {
+      llvm::dbgs() << "[" << first << ", " << last << ")\n";
+    }
+
+    for (const auto &Interval : PaddingIntervals) {
+      ClearPadding(Ptr, Interval);
     }
   }
 
-  auto VisitBases =
-      [&](std::vector<std::pair<size_t, CXXBaseSpecifier>> &BasesToVisit) {
-        std::sort(
-            BasesToVisit.begin(), BasesToVisit.end(),
-            [](const auto &P1, const auto &P2) { return P1.first < P2.first; });
-        for (const auto &Pair : BasesToVisit) {
-          // is it OK to use structured binding in clang? what is the language
-          // version?
-          auto Offset = Pair.first;
-          auto Base = Pair.second;
-
-          llvm::dbgs() << "visiting base at offset " << Offset << '\n';
-          // Recursively zero out base classes.
-          auto Index = SL->getElementContainingOffset(Offset);
-          Value *Idx = CGF.Builder.getSize(Index);
-          llvm::Type *CurrentBaseType = CGF.ConvertTypeForMem(Base.getType());
-          Value *BaseElement = CGF.Builder.CreateGEP(CurrentBaseType, Ptr, Idx);
-          RecursivelyClearPaddingImpl(CGF, BaseElement, Base.getType(),
-                                      CurrentStartOffset + Offset,
-                                      RunningOffset, WriteZeroAtOffset, false);
-        }
-      };
+private:
+  struct BitInterval {
+    // [First, Last)
+    uint64_t First;
+    uint64_t Last;
+  };
 
-  VisitBases(Bases);
-
-  size_t NumFields = std::distance(R->field_begin(), R->field_end());
-  std::vector<size_t> FieldOffsets;
-  FieldOffsets.reserve(NumFields);
-  auto CurrentField = R->field_begin();
-  for (size_t I = 0; I < NumFields; ++I, ++CurrentField) {
-    // Size needs to be in bytes so we can compare it later.
-    auto Offset = ASTLayout.getFieldOffset(I) / 8;
-    llvm::dbgs() << "visiting field at offset " << Offset << '\n';
-    auto Index = SL->getElementContainingOffset(Offset);
-    Value *Idx = CGF.Builder.getSize(Index);
-    llvm::Type *CurrentFieldType =
-        CGF.ConvertTypeForMem(CurrentField->getType());
-    Value *Element = CGF.Builder.CreateGEP(CurrentFieldType, Ptr, Idx);
-    RecursivelyClearPaddingImpl(CGF, Element, CurrentField->getType(),
-                                CurrentStartOffset + Offset, RunningOffset,
-                                WriteZeroAtOffset, true);
-  }
-
-  if (VisitVirtualBase) {
-
-    std::vector<std::pair<size_t, CXXBaseSpecifier>> VBases;
-    VBases.reserve(R->getNumVBases());
-    for (auto VBase : R->vbases()) {
-      auto *BaseRecord =
-          cast<CXXRecordDecl>(VBase.getType()->getAsRecordDecl());
-      auto Offset = static_cast<size_t>(
-          ASTLayout.getVBaseClassOffset(BaseRecord).getQuantity());
-      VBases.emplace_back(Offset, VBase);
+  struct Data {
+    uint64_t StartBitOffset;
+    QualType Ty;
+    bool VisitVirtualBase;
+  };
+
+  void Visit(Data const &D) {
+    if (auto *AT = dyn_cast<ConstantArrayType>(D.Ty)) {
+      VisitArray(AT, D.StartBitOffset);
+      return;
+    }
+
+    if (auto *Record = D.Ty->getAsCXXRecordDecl()) {
+      VisitStruct(Record, D.StartBitOffset, D.VisitVirtualBase);
+      return;
+    }
+
+    if (D.Ty->isAtomicType()) {
+      auto Unwrapped = D;
+      Unwrapped.Ty = D.Ty.getAtomicUnqualifiedType();
+      Queue.push_back(Unwrapped);
+      return;
+    }
+
+    if (const auto *Complex = D.Ty->getAs<ComplexType>()) {
+      VisitComplex(Complex, D.StartBitOffset);
+      return;
     }
 
-    VisitBases(VBases);
+    auto *Type = CGF.ConvertTypeForMem(D.Ty);
+    auto SizeBit = CGF.CGM.getModule()
+                       .getDataLayout()
+                       .getTypeSizeInBits(Type)
+                       .getKnownMinValue();
+    llvm::dbgs() << "clear_padding primitive type. adding Interval ["
+                 << D.StartBitOffset << ", " << D.StartBitOffset + SizeBit
+                 << ")\n";
+    OccuppiedIntervals.push_back(
+        BitInterval{D.StartBitOffset, D.StartBitOffset + SizeBit});
+  }
+
+  void VisitArray(const ConstantArrayType *AT, uint64_t StartBitOffset) {
+    llvm::dbgs() << "clear_padding visiting constant array starting from "
+                 << StartBitOffset << "\n";
+    for (uint64_t ArrIndex = 0; ArrIndex < AT->getSize().getLimitedValue();
+         ++ArrIndex) {
+
+      QualType ElementQualType = AT->getElementType();
+      auto ElementSize = CGF.getContext().getTypeSizeInChars(ElementQualType);
+      auto ElementAlign = CGF.getContext().getTypeAlignInChars(ElementQualType);
+      auto Offset = ElementSize.alignTo(ElementAlign);
+
+      Queue.push_back(
+          Data{StartBitOffset + ArrIndex * Offset.getQuantity() * CharWidth,
+               ElementQualType, true});
+    }
   }
-}
 
-template <class T>
-void ClearPaddingConstantArray(CodeGenFunction &CGF, Value *Ptr,
-                               llvm::Type *Type, ConstantArrayType const *AT,
-                               size_t CurrentStartOffset, size_t &RunningOffset,
-                               T &&WriteZeroAtOffset) {
-  llvm::dbgs() << "clear padding constant array\n";
-  for (size_t ArrIndex = 0; ArrIndex < AT->getSize().getLimitedValue();
-       ++ArrIndex) {
+  void VisitStruct(const CXXRecordDecl *R, uint64_t StartBitOffset,
+                   bool VisitVirtualBase) {
+    llvm::dbgs() << "clear_padding visiting struct: "
+                 << R->getQualifiedNameAsString() << " starting from offset "
+                 << StartBitOffset << '\n';
+    const auto &DL = CGF.CGM.getModule().getDataLayout();
+
+    const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R);
+    if (ASTLayout.hasOwnVFPtr()) {
+      llvm::dbgs()
+          << "clear_padding found vtable ptr. Adding occuppied interval ["
+          << StartBitOffset << ", "
+          << (StartBitOffset + DL.getPointerSizeInBits()) << ")\n";
+      OccuppiedIntervals.push_back(BitInterval{
+          StartBitOffset, StartBitOffset + DL.getPointerSizeInBits()});
+    }
 
-    QualType ElementQualType = AT->getElementType();
+    const auto VisitBase = [&ASTLayout, StartBitOffset, this](
+                               const CXXBaseSpecifier &Base, auto GetOffset) {
+      auto *BaseRecord = Base.getType()->getAsCXXRecordDecl();
+      if (!BaseRecord) {
+        llvm::dbgs() << "Base is not a CXXRecord!\n";
+        return;
+      }
+      auto BaseOffset =
+          std::invoke(GetOffset, ASTLayout, BaseRecord).getQuantity();
 
-    auto *ElementRecord = ElementQualType->getAsRecordDecl();
-    if (!ElementRecord) {
-      llvm::dbgs() << "null!\n";
+      llvm::dbgs() << "visiting base at offset " << StartBitOffset << " + "
+                   << BaseOffset * CharWidth << '\n';
+      Queue.push_back(
+          Data{StartBitOffset + BaseOffset * CharWidth, Base.getType(), false});
+    };
+
+    for (auto Base : R->bases()) {
+      if (!Base.isVirtual()) {
+        VisitBase(Base, &ASTRecordLayout::getBaseClassOffset);
+      }
+    }
+
+    if (VisitVirtualBase) {
+      for (auto VBase : R->vbases()) {
+        VisitBase(VBase, &ASTRecordLayout::getVBaseClassOffset);
+      }
+    }
+
+    for (auto *Field : R->fields()) {
+      auto FieldOffset = ASTLayout.getFieldOffset(Field->getFieldIndex());
+      llvm::dbgs() << "visiting field at offset " << StartBitOffset << " + "
+                   << FieldOffset << '\n';
+      if (Field->isBitField()) {
+        llvm::dbgs() << "clear_padding found bit field. Adding Interval ["
+                     << StartBitOffset + FieldOffset << " , "
+                     << FieldOffset + Field->getBitWidthValue(CGF.getContext())
+                     << ")\n";
+        OccuppiedIntervals.push_back(
+            BitInterval{StartBitOffset + FieldOffset,
+                        StartBitOffset + FieldOffset +
+                            Field->getBitWidthValue(CGF.getContext())});
+      } else {
+        Queue.push_back(
+            Data{StartBitOffset + FieldOffset, Field->getType(), true});
+      }
     }
-    auto ElementAlign =
-        ElementRecord
-            ? CGF.getContext().getASTRecordLayout(ElementRecord).getAlignment()
-            : CGF.getContext().getTypeAlignInChars(ElementQualType);
-
-    Address FieldElementAddr{Ptr, Type, ElementAlign};
-
-    auto Element = CGF.Builder.CreateConstArrayGEP(FieldElementAddr, ArrIndex);
-    auto *ElementType = CGF.ConvertTypeForMem(ElementQualType);
-    auto AllocSize =
-        CGF.CGM.getModule().getDataLayout().getTypeAllocSize(ElementType);
-    llvm::dbgs() << "clearing array index! " << ArrIndex << '\n';
-    RecursivelyClearPaddingImpl(CGF, Element.getBasePointer(), ElementQualType,
-                                CurrentStartOffset +
-                                    ArrIndex * AllocSize.getKnownMinValue(),
-                                RunningOffset, WriteZeroAtOffset, true);
   }
-}
 
-template <class T>
-void RecursivelyClearPaddingImpl(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
-                                 size_t CurrentStartOffset,
-                                 size_t &RunningOffset, T &&WriteZeroAtOffset,
-                                 bool VisitVirtualBase) {
-
-  llvm::dbgs() << "clear padding before current  [" << RunningOffset << ", "
-               << CurrentStartOffset << ")\n";
-  for (; RunningOffset < CurrentStartOffset; ++RunningOffset) {
-    WriteZeroAtOffset(RunningOffset);
-  }
-  auto *Type = CGF.ConvertTypeForMem(Ty);
-  auto Size = CGF.CGM.getModule()
-                  .getDataLayout()
-                  .getTypeSizeInBits(Type)
-                  .getKnownMinValue() /
-              8;
-
-  if (auto *AT = dyn_cast<ConstantArrayType>(Ty)) {
-    ClearPaddingConstantArray(CGF, Ptr, Type, AT, CurrentStartOffset,
-                              RunningOffset, WriteZeroAtOffset);
-  } else if (auto *ST = dyn_cast<StructType>(Type); ST && Ty->isRecordType()) {
-    ClearPaddingStruct(CGF, Ptr, Ty, ST, CurrentStartOffset, RunningOffset,
-                       WriteZeroAtOffset, VisitVirtualBase);
-  } else if (Ty->isAtomicType()) {
-    RecursivelyClearPaddingImpl(CGF, Ptr, Ty.getAtomicUnqualifiedType(),
-                                CurrentStartOffset, RunningOffset,
-                                WriteZeroAtOffset, true);
-  } else {
-    llvm::dbgs() << "increment running offset from: " << RunningOffset << " to "
-                 << RunningOffset + Size << '\n';
-    RunningOffset =
-        std::max(RunningOffset, CurrentStartOffset + static_cast<size_t>(Size));
+  void VisitComplex(const ComplexType *CT, uint64_t StartBitOffset) {
+    QualType ElementQualType = CT->getElementType();
+    auto ElementSize = CGF.getContext().getTypeSizeInChars(ElementQualType);
+    auto ElementAlign = CGF.getContext().getTypeAlignInChars(ElementQualType);
+    auto ImgOffset = ElementSize.alignTo(ElementAlign);
+
+    llvm::dbgs() << "clear_padding visiting Complex Type. Real from "
+                 << StartBitOffset << "Img from "
+                 << StartBitOffset + ImgOffset.getQuantity() * CharWidth
+                 << "\n";
+    Queue.push_back(Data{StartBitOffset, ElementQualType, true});
+    Queue.push_back(Data{StartBitOffset + ImgOffset.getQuantity() * CharWidth,
+                         ElementQualType, true});
   }
-}
 
-static void RecursivelyClearPadding(CodeGenFunction &CGF, Value *Ptr,
-                                    QualType Ty) {
-  auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
-  auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
-  auto WriteZeroAtOffset = [&](uint64_t Offset) {
-    auto *Index = ConstantInt::get(CGF.IntTy, Offset);
-    auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
-    CGF.Builder.CreateAlignedStore(
-        Zero, Element,
-        CharUnits::One().alignmentAtOffset(CharUnits::fromQuantity(Offset)));
-  };
+  void MergeOccuppiedIntervals() {
+    std::sort(OccuppiedIntervals.begin(), OccuppiedIntervals.end(),
+              [](const BitInterval &lhs, const BitInterval &rhs) {
+                return std::tie(lhs.First, lhs.Last) <
+                       std::tie(rhs.First, rhs.Last);
+              });
 
-  size_t RunningOffset = 0;
+    std::vector<BitInterval> Merged;
+    Merged.reserve(OccuppiedIntervals.size());
 
-  RecursivelyClearPaddingImpl(CGF, Ptr, Ty, 0, RunningOffset, WriteZeroAtOffset,
-                              true);
+    for (const BitInterval &NextInterval : OccuppiedIntervals) {
+      if (Merged.empty()) {
+        Merged.push_back(NextInterval);
+        continue;
+      }
+      auto &LastInterval = Merged.back();
 
-  // Clear tail padding
-  auto *Type = CGF.ConvertTypeForMem(Ty);
+      if (NextInterval.First > LastInterval.Last) {
+        Merged.push_back(NextInterval);
+      } else {
+        LastInterval.Last = std::max(LastInterval.Last, NextInterval.Last);
+      }
+    }
 
-  auto Size = CGF.CGM.getModule()
-                  .getDataLayout()
-                  .getTypeAllocSize(Type)
-                  .getKnownMinValue();
+    OccuppiedIntervals = Merged;
+  }
 
-  llvm::dbgs() << "clear tail padding  [" << RunningOffset << ", " << Size
-               << ")\n";
-  for (; RunningOffset < Size; ++RunningOffset) {
-    WriteZeroAtOffset(RunningOffset);
+  std::vector<BitInterval> GetPaddingIntervals(uint64_t SizeInBits) const {
+    std::vector<BitInterval> Results;
+    if (OccuppiedIntervals.size() == 1 &&
+        OccuppiedIntervals.front().First == 0 &&
+        OccuppiedIntervals.end()->Last == SizeInBits) {
+      return Results;
+    }
+    Results.reserve(OccuppiedIntervals.size() + 1);
+    uint64_t CurrentPos = 0;
+    for (const BitInterval &OccupiedInterval : OccuppiedIntervals) {
+      if (OccupiedInterval.First > CurrentPos) {
+        Results.push_back(BitInterval{CurrentPos, OccupiedInterval.First});
+      }
+      CurrentPos = OccupiedInterval.Last;
+    }
+    if (SizeInBits > CurrentPos) {
+      Results.push_back(BitInterval{CurrentPos, SizeInBits});
+    }
+    return Results;
+  }
+
+  void ClearPadding(Value *Ptr, const BitInterval &PaddingInteval) {
+    // TODO: support clearning non-one-byte clearing
+    auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
+    auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
+    for (auto Offset = PaddingInteval.First / CharWidth;
+         Offset < PaddingInteval.Last / CharWidth; ++Offset) {
+      auto *Index = ConstantInt::get(CGF.IntTy, Offset);
+      auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+      CGF.Builder.CreateAlignedStore(
+          Zero, Element,
+          CharUnits::One().alignmentAtOffset(CharUnits::fromQuantity(Offset)));
+    }
   }
-}
+
+  CodeGenFunction &CGF;
+  const uint64_t CharWidth;
+  std::deque<Data> Queue;
+  std::vector<BitInterval> OccuppiedIntervals;
+};
+
+} // namespace
 
 RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
                                         const CallExpr *E,
@@ -4972,7 +5025,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     const Expr *Op = E->getArg(0);
     Value *Address = EmitScalarExpr(Op);
     auto PointeeTy = Op->getType()->getPointeeType();
-    RecursivelyClearPadding(*this, Address, PointeeTy);
+    PaddingClearer clearer{*this};
+    clearer.run(Address, PointeeTy);
+    //RecursivelyClearPadding(*this, Address, PointeeTy);
     return RValue::get(nullptr);
   }
   case Builtin::BI__sync_fetch_and_add:
diff --git a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
index d504ac58e43ae..49c57b1473447 100644
--- a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
+++ b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
@@ -30,6 +30,12 @@ void print_bytes(const T* object) {
 template <class T>
 void __builtin_clear_padding2(T t) {
   __builtin_clear_padding(t);
+  (void)t;
+}
+
+void assert2(bool b){
+  assert(b);
+  (void)b;
 }
 
 template <size_t A1, size_t A2, class T>
@@ -98,9 +104,9 @@ void testAllStructsForType(T a, T b, T c, T d) {
     memset(&basic2, 42, sizeof(B));
     basic2.x = a;
     basic2.y = b;
-    assert(memcmp(&basic1, &basic2, sizeof(B)) != 0);
+    assert2(memcmp(&basic1, &basic2, sizeof(B)) != 0);
     __builtin_clear_padding2(&basic2);
-    assert(memcmp(&basic1, &basic2, sizeof(B)) == 0);
+    assert2(memcmp(&basic1, &basic2, sizeof(B)) == 0);
   }
 
   // spaced array
@@ -119,9 +125,9 @@ void testAllStructsForType(T a, T b, T c, T d) {
     arr2.y[0] = c;
     arr2.y[1] = d;
     arr2.c    = 0;
-    assert(memcmp(&arr1, &arr2, sizeof(A)) != 0);
+    assert2(memcmp(&arr1, &arr2, sizeof(A)) != 0);
     __builtin_clear_padding2(&arr2);
-    assert(memcmp(&arr1, &arr2, sizeof(A)) == 0);
+    assert2(memcmp(&arr1, &arr2, sizeof(A)) == 0);
   }
 
   // pointer members
@@ -135,9 +141,9 @@ void testAllStructsForType(T a, T b, T c, T d) {
     memset(&ptr2, 42, sizeof(P));
     ptr2.x = &a;
     ptr2.y = &b;
-    assert(memcmp(&ptr1, &ptr2, sizeof(P)) != 0);
+    assert2(memcmp(&ptr1, &ptr2, sizeof(P)) != 0);
     __builtin_clear_padding2(&ptr2);
-    assert(memcmp(&ptr1, &ptr2, sizeof(P)) == 0);
+    assert2(memcmp(&ptr1, &ptr2, sizeof(P)) == 0);
   }
 
   // three members
@@ -154,7 +160,7 @@ void testAllStructsForType(T a, T b, T c, T d) {
     three2.y = b;
     three2.z = c;
     __builtin_clear_padding2(&three2);
-    assert(memcmp(&three1, &three2, sizeof(Three)) == 0);
+    assert2(memcmp(&three1, &three2, sizeof(Three)) == 0);
   }
 
   // Normal struct no padding
@@ -169,7 +175,7 @@ void testAllStructsForType(T a, T b, T c, T d) {
     normal2.a = a;
     normal2.b = b;
     __builtin_clear_padding2(&normal2);
-    assert(memcmp(&normal1, &normal2, sizeof(N)) == 0);
+    assert2(memcmp(&normal1, &normal2, sizeof(N)) == 0);
   }
 
   // base class
@@ -189,9 +195,9 @@ void testAllStructsForType(T a, T b, T c, T d) {
     base2.x = c;
     base2.y = d;
     base2.z = a;
-    assert(memcmp(&base1, &base2, sizeof(H)) != 0);
+    assert2(memcmp(&base1, &base2, sizeof(H)) != 0);
     __builtin_clear_padding2(&base2);
-    assert(memcmp(&base1, &base2, sizeof(H)) == 0);
+    assert2(memcmp(&base1, &base2, sizeof(H)) == 0);
   }
 }
 
@@ -220,10 +226,10 @@ void otherStructTests() {
     u2->buf[1] = 2;
     u2->buf[2] = 3;
     u2->buf[3] = 4;
-    assert(memcmp(u1, u2, sizeof(UnsizedTail)) != 0);
+    assert2(memcmp(u1, u2, sizeof(UnsizedTail)) != 0);
     __builtin_clear_padding2(u2);
 
-    assert(memcmp(u1, u2, sizeof(UnsizedTail)) == 0);
+    assert2(memcmp(u1, u2, sizeof(UnsizedTail)) == 0);
   }
 
   // basic padding on the heap
@@ -237,9 +243,9 @@ void otherStructTests() {
     memset(basic2, 42, sizeof(B));
     basic2->x = 1;
     basic2->y = 2;
-    assert(memcmp(basic1, basic2, sizeof(B)) != 0);
+    assert2(memcmp(basic1, basic2, sizeof(B)) != 0);
     __builtin_clear_padding2(basic2);
-    assert(memcmp(basic1, basic2, sizeof(B)) == 0);
+    assert2(memcmp(basic1, basic2, sizeof(B)) == 0);
     delete basic2;
     delete basic1;
   }
@@ -255,10 +261,10 @@ void otherStructTests() {
     memset(basic4, 42, sizeof(B));
     basic4->x = 1;
     basic4->y = 2;
-    assert(memcmp(basic3, basic4, sizeof(B)) != 0);
+    assert2(memcmp(basic3, basic4, sizeof(B)) != 0);
     __builtin_clear_padding2(const_cast<volatile B*>(basic4));
     __builtin_clear_padding2(basic4);
-    assert(memcmp(basic3, basic4, sizeof(B)) == 0);
+    assert2(memcmp(basic3, basic4, sizeof(B)) == 0);
     delete basic4;
     delete basic3;
   }
@@ -277,8 +283,8 @@ void primitiveTests() {
   {
     int i1 = 42, i2 = 42;
     __builtin_clear_padding2(&i1); // does nothing
-    assert(i1 == 42);
-    assert(memcmp(&i1, &i2, sizeof(int)) == 0);
+    assert2(i1 == 42);
+    assert2(memcmp(&i1, &i2, sizeof(int)) == 0);
   }
 
   // long double
@@ -291,8 +297,20 @@ void primitiveTests() {
     d2 = 3.0L;
 
     __builtin_clear_padding2(&d1);
-    assert(d1 == 3.0L);
-    assert(memcmp(&d1, &d2, sizeof(long double)) == 0);
+    assert2(d1 == 3.0L);
+    assert2(memcmp(&d1, &d2, sizeof(long double)) == 0);
+  }
+
+  // _Complex
+  {
+    _Complex long double c1, c2;
+
+    memset(&c1, 42, sizeof(_Complex long double));
+    memset(&c2, 0, sizeof(_Complex long double));
+    c1 = 3.0L ;
+    c1 = 3.0L ;
+    __builtin_clear_padding2(&c1);
+    //TODO
   }
 }
 
@@ -320,13 +338,13 @@ void structTests() {
     s2.s.c = 'a';
     s2.b   = true;
 
-    assert(memcmp(&s1, &s2, sizeof(S2)) != 0);
+    assert2(memcmp(&s1, &s2, sizeof(S2)) != 0);
     __builtin_clear_padding2(&s1);
-    assert(s1.s.x == 4);
-    assert(s1.s.c == 'a');
-    assert(s1.b == true);
+    assert2(s1.s.x == 4);
+    assert2(s1.s.c == 'a');
+    assert2(s1.b == true);
 
-    assert(memcmp(&s1, &s2, sizeof(S2)) == 0);
+    assert2(memcmp(&s1, &s2, sizeof(S2)) == 0);
   }
 
   // struct with long double
@@ -345,11 +363,11 @@ void structTests() {
     s2.l = 3.0L;
     s2.b = true;
 
-    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    assert2(memcmp(&s1, &s2, sizeof(S)) != 0);
     __builtin_clear_padding2(&s1);
-    assert(s1.l == 3.0L);
-    assert(s1.b == true);
-    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+    assert2(s1.l == 3.0L);
+    assert2(s1.b == true);
+    assert2(memcmp(&s1, &s2, sizeof(S)) == 0);
   }
 
   // EBO
@@ -371,11 +389,11 @@ void structTests() {
     s2.i = 4;
     s2.b = true;
 
-    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    assert2(memcmp(&s1, &s2, sizeof(S)) != 0);
     __builtin_clear_padding2(&s1);
-    assert(s1.i == 4);
-    assert(s1.b == true);
-    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+    assert2(s1.i == 4);
+    assert2(s1.b == true);
+    assert2(memcmp(&s1, &s2, sizeof(S)) == 0);
   }
 
   // padding between bases
@@ -398,11 +416,11 @@ void structTests() {
     s2.c1 = 'a';
     s2.c2 = 'b';
 
-    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    assert2(memcmp(&s1, &s2, sizeof(S)) != 0);
     __builtin_clear_padding2(&s1);
-    assert(s1.c1 == 'a');
-    assert(s1.c2 == 'b');
-    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+    assert2(s1.c1 == 'a');
+    assert2(s1.c2 == 'b');
+    assert2(memcmp(&s1, &s2, sizeof(S)) == 0);
   }
 
   // padding after last base
@@ -429,12 +447,12 @@ void structTests() {
     s2.c2 = 'b';
     s2.c3 = 'c';
 
-    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    assert2(memcmp(&s1, &s2, sizeof(S)) != 0);
     __builtin_clear_padding2(&s1);
-    assert(s1.c1 == 'a');
-    assert(s1.c2 == 'b');
-    assert(s1.c3 == 'c');
-    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+    assert2(s1.c1 == 'a');
+    assert2(s1.c2 == 'b');
+    assert2(s1.c3 == 'c');
+    assert2(memcmp(&s1, &s2, sizeof(S)) == 0);
   }
 
   // vtable
@@ -469,11 +487,11 @@ void structTests() {
     s1->z = true;
     s2->z = true;
     __builtin_clear_padding2(s2);
-    assert(s2->x == 0xFFFFFFFF);
-    assert(s2->y == 'a');
-    assert(s2->z == true);
-    assert(s2->call() == 5);
-    assert(memcmp(s1, s2, sizeof(S)) == 0);
+    assert2(s2->x == 0xFFFFFFFF);
+    assert2(s2->y == 'a');
+    assert2(s2->z == true);
+    assert2(s2->call() == 5);
+    assert2(memcmp(s1, s2, sizeof(S)) == 0);
   }
 
   // multiple bases with vtable
@@ -524,13 +542,13 @@ void structTests() {
     s1->z  = true;
     s2->z  = true;
     __builtin_clear_padding2(s2);
-    assert(s2->x1 == 0xFFFFFFFF);
-    assert(s2->x2 == 0xFAFAFAFA);
-    assert(s2->x3 == 0xAAAAAAAA);
-    assert(s2->y == 'a');
-    assert(s2->z == true);
-    assert(s2->call1() == 5);
-    assert(memcmp(s1, s2, sizeof(S)) == 0);
+    assert2(s2->x1 == 0xFFFFFFFF);
+    assert2(s2->x2 == 0xFAFAFAFA);
+    assert2(s2->x3 == 0xAAAAAAAA);
+    assert2(s2->y == 'a');
+    assert2(s2->z == true);
+    assert2(s2->call1() == 5);
+    assert2(memcmp(s1, s2, sizeof(S)) == 0);
   }
 
   // chain of bases with virtual functions
@@ -580,7 +598,7 @@ void structTests() {
     s1->z  = true;
     s2->z  = true;
     __builtin_clear_padding2(s2);
-    assert(memcmp(s1, s2, sizeof(S)) == 0);
+    assert2(memcmp(s1, s2, sizeof(S)) == 0);
   }
 
   // virtual inheritance
@@ -621,7 +639,7 @@ void structTests() {
     s1->s  = true;
     s2->s  = true;
     __builtin_clear_padding2(s2);
-    assert(memcmp(s1, s2, sizeof(S)) == 0);
+    assert2(memcmp(s1, s2, sizeof(S)) == 0);
   }
 
   // bit fields
@@ -644,11 +662,12 @@ void structTests() {
     s2.b2 = 27;
     s1.b3 = 3;
     s2.b3 = 3;
-    __builtin_clear_padding(&s2);
-    print_bytes(&s1);
-    print_bytes(&s2);
+    __builtin_clear_padding2(&s2);
+    //print_bytes(&s1);
+    //print_bytes(&s2);
+    //assert(false);
     //TODO
-    //assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+    //assert2(memcmp(&s1, &s2, sizeof(S)) == 0);
   }
 
   testAllStructsForType<32, 16, char>(11, 22, 33, 44);
@@ -692,9 +711,9 @@ void unionTests() {
     u2.c = '4';
 
     __builtin_clear_padding2(&u1); // should have no effect
-    assert(u1.c == '4');
+    assert2(u1.c == '4');
 
-    assert(memcmp(&u1, &u2, sizeof(u)) == 0);
+    assert2(memcmp(&u1, &u2, sizeof(u)) == 0);
   }
 
   // tail padding of longest member
@@ -715,10 +734,10 @@ void unionTests() {
     u1.s1.c1 = '4';
     u2.s1.c1 = '4';
 
-    assert(memcmp(&u1, &u2, sizeof(u)) != 0);
+    assert2(memcmp(&u1, &u2, sizeof(u)) != 0);
     __builtin_clear_padding2(&u1);
-    assert(u1.s1.c1 == '4');
-    assert(memcmp(&u1, &u2, sizeof(u)) == 0);
+    assert2(u1.s1.c1 == '4');
+    assert2(memcmp(&u1, &u2, sizeof(u)) == 0);
   }
 }
 
@@ -729,9 +748,9 @@ void arrayTests() {
     int i2[2] = {1, 2};
 
     __builtin_clear_padding2(&i1);
-    assert(i1[0] == 1);
-    assert(i1[1] == 2);
-    assert(memcmp(&i1, &i2, 2 * sizeof(int)) == 0);
+    assert2(i1[0] == 1);
+    assert2(i1[1] == 2);
+    assert2(memcmp(&i1, &i2, 2 * sizeof(int)) == 0);
   }
 
   // long double
@@ -746,9 +765,9 @@ void arrayTests() {
     d2[1] = 4.0L;
 
     __builtin_clear_padding2(&d1);
-    assert(d1[0] == 3.0L);
-    assert(d2[1] == 4.0L);
-    assert(memcmp(&d1, &d2, 2 * sizeof(long double)) == 0);
+    assert2(d1[0] == 3.0L);
+    assert2(d2[1] == 4.0L);
+    assert2(memcmp(&d1, &d2, 2 * sizeof(long double)) == 0);
   }
 
   // struct
@@ -782,18 +801,18 @@ void arrayTests() {
     s2[1].i2 = 4;
     s2[1].c2 = 'd';
 
-    assert(memcmp(&s1, &s2, 2 * sizeof(S)) != 0);
+    assert2(memcmp(&s1, &s2, 2 * sizeof(S)) != 0);
     __builtin_clear_padding2(&s1);
 
-    assert(s1[0].i1 == 1);
-    assert(s1[0].c1 == 'a');
-    assert(s1[0].i2 == 2);
-    assert(s1[0].c2 == 'b');
-    assert(s1[1].i1 == 3);
-    assert(s1[1].c1 == 'c');
-    assert(s1[1].i2 == 4);
-    assert(s1[1].c2 == 'd');
-    assert(memcmp(&s1, &s2, 2 * sizeof(S)) == 0);
+    assert2(s1[0].i1 == 1);
+    assert2(s1[0].c1 == 'a');
+    assert2(s1[0].i2 == 2);
+    assert2(s1[0].c2 == 'b');
+    assert2(s1[1].i1 == 3);
+    assert2(s1[1].c1 == 'c');
+    assert2(s1[1].i2 == 4);
+    assert2(s1[1].c2 == 'd');
+    assert2(memcmp(&s1, &s2, 2 * sizeof(S)) == 0);
   }
 }
 

>From 5af4c28ffd10fe730664b74bad9751c455356e75 Mon Sep 17 00:00:00 2001
From: Hui <hui.xie0621 at gmail.com>
Date: Sun, 23 Jun 2024 19:33:23 +0100
Subject: [PATCH 3/5] handle bit field

---
 clang/lib/CodeGen/CGBuiltin.cpp               | 80 ++++++++++++++++---
 .../atomics/builtin_clear_padding.pass.cpp    | 26 +++---
 2 files changed, 80 insertions(+), 26 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 408ca84be49cf..2ce895b3b0ac1 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2787,20 +2787,76 @@ struct PaddingClearer {
     return Results;
   }
 
-  void ClearPadding(Value *Ptr, const BitInterval &PaddingInteval) {
-    // TODO: support clearning non-one-byte clearing
-    auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
-    auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
-    for (auto Offset = PaddingInteval.First / CharWidth;
-         Offset < PaddingInteval.Last / CharWidth; ++Offset) {
-      auto *Index = ConstantInt::get(CGF.IntTy, Offset);
-      auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
-      CGF.Builder.CreateAlignedStore(
-          Zero, Element,
-          CharUnits::One().alignmentAtOffset(CharUnits::fromQuantity(Offset)));
-    }
+
+
+  void ClearPadding(Value *Ptr, const BitInterval &PaddingInterval) {
+      auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
+      auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
+  
+      // Calculate byte indices and bit positions
+      auto StartByte = PaddingInterval.First / CharWidth;
+      auto StartBit = PaddingInterval.First % CharWidth;
+      auto EndByte = PaddingInterval.Last / CharWidth;
+      auto EndBit = PaddingInterval.Last % CharWidth;
+  
+      if (StartByte == EndByte) {
+          // Interval is within a single byte
+          auto *Index = ConstantInt::get(CGF.IntTy, StartByte);
+          auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+          Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());
+  
+          auto *Value = CGF.Builder.CreateLoad(ElementAddr);
+  
+          // Create mask to clear bits within the byte
+          uint8_t mask = ((1 << EndBit) - 1) & ~((1 << StartBit) - 1);
+          auto *MaskValue = ConstantInt::get(CGF.Int8Ty, mask);
+          auto *NewValue = CGF.Builder.CreateAnd(Value, MaskValue);
+  
+          CGF.Builder.CreateStore(NewValue, ElementAddr);
+      } else {
+          // Handle the start byte
+          if (StartBit != 0) {
+              auto *Index = ConstantInt::get(CGF.IntTy, StartByte);
+              auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+              Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());
+  
+              auto *Value = CGF.Builder.CreateLoad(ElementAddr);
+  
+              uint8_t startMask = ((1 << (CharWidth - StartBit)) - 1) << StartBit;
+              auto *MaskValue = ConstantInt::get(CGF.Int8Ty, ~startMask);
+              auto *NewValue = CGF.Builder.CreateAnd(Value, MaskValue);
+  
+              CGF.Builder.CreateStore(NewValue, ElementAddr);
+              ++StartByte;
+          }
+  
+          // Handle full bytes in the middle
+          for (auto Offset = StartByte; Offset < EndByte; ++Offset) {
+              auto *Index = ConstantInt::get(CGF.IntTy, Offset);
+              auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+              Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());
+  
+              CGF.Builder.CreateStore(Zero, ElementAddr);
+          }
+  
+          // Handle the end byte
+          if (EndBit != 0) {
+              auto *Index = ConstantInt::get(CGF.IntTy, EndByte);
+              auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+              Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());
+  
+              auto *Value = CGF.Builder.CreateLoad(ElementAddr);
+  
+              uint8_t endMask = (1 << EndBit) - 1;
+              auto *MaskValue = ConstantInt::get(CGF.Int8Ty, endMask);
+              auto *NewValue = CGF.Builder.CreateAnd(Value, MaskValue);
+  
+              CGF.Builder.CreateStore(NewValue, ElementAddr);
+          }
+      }
   }
 
+
   CodeGenFunction &CGF;
   const uint64_t CharWidth;
   std::deque<Data> Queue;
diff --git a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
index 49c57b1473447..ec220088ac1be 100644
--- a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
+++ b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
@@ -29,12 +29,12 @@ void print_bytes(const T* object) {
 
 template <class T>
 void __builtin_clear_padding2(T t) {
-  __builtin_clear_padding(t);
+  //__builtin_clear_padding(t);
   (void)t;
 }
 
 void assert2(bool b){
-  assert(b);
+  //assert(b);
   (void)b;
 }
 
@@ -647,9 +647,9 @@ void structTests() {
     struct S {
       // will usually occupy 2 bytes:
       unsigned char b1 : 3; // 1st 3 bits (in 1st byte) are b1
-      unsigned char    : 2; // next 2 bits (in 1st byte) are blocked out as unused
-      unsigned char b2 : 6; // 6 bits for b2 - doesn't fit into the 1st byte => starts a 2nd
-      unsigned char b3 : 2; // 2 bits for b3 - next (and final) bits in the 2nd byte
+      unsigned char b2 : 2; // next 2 bits (in 1st byte) are blocked out as unused
+      unsigned char b3 : 6; // 6 bits for b2 - doesn't fit into the 1st byte => starts a 2nd
+      unsigned char b4 : 2; // 2 bits for b3 - next (and final) bits in the 2nd byte
     };
 
     S s1, s2;
@@ -658,16 +658,14 @@ void structTests() {
 
     s1.b1 = 5;
     s2.b1 = 5;
-    s1.b2 = 27;
-    s2.b2 = 27;
-    s1.b3 = 3;
-    s2.b3 = 3;
+    s1.b2 = 3;
+    s2.b2 = 3;
+    s1.b3 = 27;
+    s2.b3 = 27;
+    s1.b4 = 3;
+    s2.b4 = 3;
     __builtin_clear_padding2(&s2);
-    //print_bytes(&s1);
-    //print_bytes(&s2);
-    //assert(false);
-    //TODO
-    //assert2(memcmp(&s1, &s2, sizeof(S)) == 0);
+    assert2(memcmp(&s1, &s2, sizeof(S)) == 0);
   }
 
   testAllStructsForType<32, 16, char>(11, 22, 33, 44);

>From 2bf78ca0c915060bbfca768c586a49a656851292 Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sun, 25 May 2025 09:38:34 +0100
Subject: [PATCH 4/5] rebase

---
 clang/include/clang/Basic/Builtins.td | 1 +
 clang/lib/CodeGen/CGBuiltin.cpp       | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 406cb2ac179ef..5e777367e20c3 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -968,6 +968,7 @@ def IsWithinLifetime : LangBuiltin<"CXX_LANG"> {
   let Spellings = ["__builtin_is_within_lifetime"];
   let Attributes = [NoThrow, CustomTypeChecking, Consteval];
   let Prototype = "bool(void*)";
+}
 
 def ClearPadding : LangBuiltin<"CXX_LANG"> {
   let Spellings = ["__builtin_clear_padding"];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 2ce895b3b0ac1..ef15475baf47c 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2711,12 +2711,12 @@ struct PaddingClearer {
       if (Field->isBitField()) {
         llvm::dbgs() << "clear_padding found bit field. Adding Interval ["
                      << StartBitOffset + FieldOffset << " , "
-                     << FieldOffset + Field->getBitWidthValue(CGF.getContext())
+                     << FieldOffset + Field->getBitWidthValue()
                      << ")\n";
         OccuppiedIntervals.push_back(
             BitInterval{StartBitOffset + FieldOffset,
                         StartBitOffset + FieldOffset +
-                            Field->getBitWidthValue(CGF.getContext())});
+                            Field->getBitWidthValue()});
       } else {
         Queue.push_back(
             Data{StartBitOffset + FieldOffset, Field->getType(), true});

>From 6b97840d76f00f89b8f07bfb73e3d281816bc9d5 Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sat, 31 May 2025 11:38:29 +0100
Subject: [PATCH 5/5] review feedback

---
 clang/include/clang/Basic/Builtins.td         |   2 +-
 clang/lib/CodeGen/CGBuiltin.cpp               |  35 +-
 .../builtin-clear-padding-codegen.cpp         | 800 ++++++++++++++++--
 3 files changed, 760 insertions(+), 77 deletions(-)

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 5e777367e20c3..eafe0642652a2 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -972,7 +972,7 @@ def IsWithinLifetime : LangBuiltin<"CXX_LANG"> {
 
 def ClearPadding : LangBuiltin<"CXX_LANG"> {
   let Spellings = ["__builtin_clear_padding"];
-  let Attributes = [NoThrow];
+  let Attributes = [NoThrow, CustomTypeChecking];
   let Prototype = "void(void*)";
 }
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index ef15475baf47c..1c41619b3c160 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2562,6 +2562,14 @@ static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
 
 namespace {
 
+
+// PaddingClearer is a utility class that clears padding bits in a
+// c++ type. It traverses the type recursively, collecting occupied
+// bit intervals, and then compute the padding intervals.
+// In the end, it clears the padding bits by writing zeros
+// to the padding intervals bytes-by-bytes. If a byte only contains
+// some padding bits, it writes zeros to only those bits. This is
+// the case for bit-fields.
 struct PaddingClearer {
   PaddingClearer(CodeGenFunction &F)
       : CGF(F), CharWidth(CGF.getContext().getCharWidth()) {}
@@ -2572,8 +2580,8 @@ struct PaddingClearer {
 
     Queue.push_back(Data{0, Ty, true});
     while (!Queue.empty()) {
-      auto Current = Queue.front();
-      Queue.pop_front();
+      auto Current = Queue.back();
+      Queue.pop_back();
       Visit(Current);
     }
 
@@ -2655,7 +2663,7 @@ struct PaddingClearer {
 
       Queue.push_back(
           Data{StartBitOffset + ArrIndex * Offset.getQuantity() * CharWidth,
-               ElementQualType, true});
+               ElementQualType, /*VisitVirtualBase*/true});
     }
   }
 
@@ -2688,8 +2696,8 @@ struct PaddingClearer {
 
       llvm::dbgs() << "visiting base at offset " << StartBitOffset << " + "
                    << BaseOffset * CharWidth << '\n';
-      Queue.push_back(
-          Data{StartBitOffset + BaseOffset * CharWidth, Base.getType(), false});
+      Queue.push_back(Data{StartBitOffset + BaseOffset * CharWidth,
+                           Base.getType(), /*VisitVirtualBase*/ false});
     };
 
     for (auto Base : R->bases()) {
@@ -2718,8 +2726,8 @@ struct PaddingClearer {
                         StartBitOffset + FieldOffset +
                             Field->getBitWidthValue()});
       } else {
-        Queue.push_back(
-            Data{StartBitOffset + FieldOffset, Field->getType(), true});
+        Queue.push_back(Data{StartBitOffset + FieldOffset, Field->getType(),
+                             /*VisitVirtualBase*/ true});
       }
     }
   }
@@ -2734,9 +2742,10 @@ struct PaddingClearer {
                  << StartBitOffset << "Img from "
                  << StartBitOffset + ImgOffset.getQuantity() * CharWidth
                  << "\n";
-    Queue.push_back(Data{StartBitOffset, ElementQualType, true});
+    Queue.push_back(
+        Data{StartBitOffset, ElementQualType, /*VisitVirtualBase*/ true});
     Queue.push_back(Data{StartBitOffset + ImgOffset.getQuantity() * CharWidth,
-                         ElementQualType, true});
+                         ElementQualType, /*VisitVirtualBase*/ true});
   }
 
   void MergeOccuppiedIntervals() {
@@ -5078,12 +5087,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     return RValue::get(Ptr);
   }
   case Builtin::BI__builtin_clear_padding: {
-    const Expr *Op = E->getArg(0);
-    Value *Address = EmitScalarExpr(Op);
-    auto PointeeTy = Op->getType()->getPointeeType();
+    Address Src = EmitPointerWithAlignment(E->getArg(0));
+    auto PointeeTy = E->getArg(0)->getType()->getPointeeType();
     PaddingClearer clearer{*this};
-    clearer.run(Address, PointeeTy);
-    //RecursivelyClearPadding(*this, Address, PointeeTy);
+    clearer.run(Src.getBasePointer(), PointeeTy);
     return RValue::get(nullptr);
   }
   case Builtin::BI__sync_fetch_and_add:
diff --git a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
index 54455e6699849..caea427d1d4dd 100644
--- a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
+++ b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
@@ -1,4 +1,158 @@
-// RUN: %clang_cc1 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -std=c++20 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+
+struct Empty {};
+
+// CHECK-LABEL: define dso_local void @_Z9testEmptyP5Empty(
+// CHECK-SAME: ptr noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[E_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[E]], ptr [[E_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    ret void
+//
+void testEmpty(Empty *e) {
+  // This should clear the one byte that Emtpy occupies.
+  __builtin_clear_padding(e);
+}
+
+
+// CHECK-LABEL: define dso_local void @_Z22testPrimitiveNoPaddingPi(
+// CHECK-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[I]], ptr [[I_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// CHECK-NEXT:    ret void
+//
+void testPrimitiveNoPadding(int *i) {
+  // This should not clear any padding, since int has no padding.
+  __builtin_clear_padding(i);
+}
+
+
+// CHECK-LABEL: define dso_local void @_Z23testPrimitiveLongDoublePe(
+// CHECK-SAME: ptr noundef [[LD:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[LD]], ptr [[LD_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[LD_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// CHECK-NEXT:    ret void
+//
+void testPrimitiveLongDouble(long double *ld) {
+  // padding [10, 15] on x86
+  __builtin_clear_padding(ld);
+}
+
+
+// CHECK-LABEL: define dso_local void @_Z30testPrimitiveComplexLongDoublePCe(
+// CHECK-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
+// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
+// CHECK-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
+// CHECK-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// CHECK-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// CHECK-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// CHECK-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// CHECK-NEXT:    ret void
+//
+void testPrimitiveComplexLongDouble(_Complex long double *c) {
+  // padding [10, 15] and [26, 31] on x86
+  __builtin_clear_padding(c);
+}
+
+union U1 {
+  int i;
+  char c;
+};
+
+// CHECK-LABEL: define dso_local void @_Z24testUnionDifferentLengthP2U1(
+// CHECK-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// CHECK-NEXT:    ret void
+//
+void testUnionDifferentLength(U1 *u) {
+  // This should not clear the object representation bits of the non-active member.
+  __builtin_clear_padding(u);
+}
+
+
+struct S {
+  alignas(8) char c1;
+};
+
+union U2 {
+  S s1;
+  char c2;
+};
+
+// CHECK-LABEL: define dso_local void @_Z35testUnionTailPaddingOfLongestMemberP2U2(
+// CHECK-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// CHECK-NEXT:    ret void
+//
+void testUnionTailPaddingOfLongestMember(U2 *u) {
+  // This should clear the tail padding of the longest member.
+  // [1 - 7]
+  __builtin_clear_padding(u);
+}
+
 
 struct alignas(4) Foo {
   char a;
@@ -21,34 +175,57 @@ struct alignas(4) Baz : Foo {
 // %struct.Foo = type { i8, i8, i8, i8 }
 // %struct.Bar = type { i8, i8, i8, i8 }
 
-// CHECK-LABEL: define void @_Z7testBazP3Baz(%struct.Baz* %baz)
-// CHECK: [[ADDR:%.*]] = alloca %struct.Baz*
-// CHECK: store %struct.Baz* %baz, %struct.Baz** [[ADDR]]
-// CHECK: [[BAZ:%.*]] = load %struct.Baz*, %struct.Baz** [[ADDR]]
-// CHECK: [[BAZ_RAW_PTR:%.*]] = bitcast %struct.Baz* [[BAZ]] to i8*
-
-// CHECK: [[FOO_BASE:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 0
-// CHECK: [[FOO_RAW_PTR:%.*]] = bitcast %struct.Foo* [[FOO_BASE]] to i8*
-// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 1
-// CHECK: store i8 0, i8* [[PAD_1]]
-// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 3
-// CHECK: store i8 0, i8* [[PAD_2]]
-
-// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 5
-// CHECK: store i8 0, i8* [[PAD_3]]
-// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 6
-// CHECK: store i8 0, i8* [[PAD_4]]
-// CHECK: [[PAD_5:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 7
-// CHECK: store i8 0, i8* [[PAD_5]]
-
-// CHECK: [[BAR_MEMBER:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 3
-// CHECK: [[BAR_RAW_PTR:%.*]] = bitcast %struct.Bar* [[BAR_MEMBER]] to i8*
-// CHECK: [[PAD_6:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 1
-// CHECK: store i8 0, i8* [[PAD_6]]
-// CHECK: [[PAD_7:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 3
-// CHECK: store i8 0, i8* [[PAD_7]]
-// CHECK: ret void
-void testBaz(Baz *baz) {
+// CHECK-LABEL: define dso_local void @_Z33testStructPaddingInBetweenMembersP3Baz(
+// CHECK-SAME: ptr noundef [[BAZ:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[BAZ_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[BAZ]], ptr [[BAZ_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAZ_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
+// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// CHECK-NEXT:    ret void
+//
+void testStructPaddingInBetweenMembers(Baz *baz) {
+  // this should clear all the padding in between various members
+  __builtin_clear_padding(baz);
+}
+
+// CHECK-LABEL: define dso_local void @_Z18testStructVolatilePV3Baz(
+// CHECK-SAME: ptr noundef [[BAZ:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[BAZ_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[BAZ]], ptr [[BAZ_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAZ_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
+// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// CHECK-NEXT:    ret void
+//
+void testStructVolatile(volatile Baz *baz) {
+  // this should clear all the padding in between various members
   __builtin_clear_padding(baz);
 }
 
@@ -63,24 +240,525 @@ struct UnsizedTail {
 // "size", PAD_1, PAD_2, PAD_3, PAD_4
 // %struct.UnsizedTail = type { i32, [4 x i8], [0 x i8] }
 
-// CHECK-LABEL: define void @_Z15testUnsizedTailP11UnsizedTail(%struct.UnsizedTail* %u)
-// CHECK: [[U_ADDR:%.*]] = alloca %struct.UnsizedTail*
-// CHECK: store %struct.UnsizedTail* %u, %struct.UnsizedTail** [[U_ADDR]]
-// CHECK: [[U:%.*]] = load %struct.UnsizedTail*, %struct.UnsizedTail** [[U_ADDR]]
-// CHECK: [[U_RAW_PTR:%.*]] = bitcast %struct.UnsizedTail* [[U]] to i8*
-// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 4
-// CHECK: store i8 0, i8* [[PAD_1]]
-// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 5
-// CHECK: store i8 0, i8* [[PAD_2]]
-// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 6
-// CHECK: store i8 0, i8* [[PAD_3]]
-// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 7
-// CHECK: store i8 0, i8* [[PAD_4]]
-// CHECK: ret void
-void testUnsizedTail(UnsizedTail *u) {
+// CHECK-LABEL: define dso_local void @_Z21testStructUnsizedTailP11UnsizedTail(
+// CHECK-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    ret void
+//
+void testStructUnsizedTail(UnsizedTail *u) {
   __builtin_clear_padding(u);
 }
 
+class S1 {
+  int x;
+  char c;
+};
+
+class S2 {
+  [[no_unique_address]] S1 s1;
+  bool b;
+};
+
+// CHECK-LABEL: define dso_local void @_Z19testNoUniqueAddressP2S2(
+// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    ret void
+//
+void testNoUniqueAddress(S2 *s) {
+  // "x [0-3]",  "c" , "b", PAD [6-7]
+  __builtin_clear_padding(s);
+}
+
+struct S3 {
+  long double l;
+  bool b;
+};
+
+// CHECK-LABEL: define dso_local void @_Z24testStructWithLongDoubleP2S3(
+// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 17
+// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 18
+// CHECK-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 19
+// CHECK-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 20
+// CHECK-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 21
+// CHECK-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 22
+// CHECK-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP0]], i32 23
+// CHECK-NEXT:    store i8 0, ptr [[TMP13]], align 1
+// CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i32 24
+// CHECK-NEXT:    store i8 0, ptr [[TMP14]], align 1
+// CHECK-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TMP0]], i32 25
+// CHECK-NEXT:    store i8 0, ptr [[TMP15]], align 1
+// CHECK-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
+// CHECK-NEXT:    store i8 0, ptr [[TMP16]], align 1
+// CHECK-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
+// CHECK-NEXT:    store i8 0, ptr [[TMP17]], align 1
+// CHECK-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
+// CHECK-NEXT:    store i8 0, ptr [[TMP18]], align 1
+// CHECK-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// CHECK-NEXT:    store i8 0, ptr [[TMP19]], align 1
+// CHECK-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// CHECK-NEXT:    store i8 0, ptr [[TMP20]], align 1
+// CHECK-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// CHECK-NEXT:    store i8 0, ptr [[TMP21]], align 1
+// CHECK-NEXT:    ret void
+//
+void testStructWithLongDouble(S3 *s) {
+  // "long double data[0-9]", PAD [10-15], "b", PAD [17-31]
+  __builtin_clear_padding(s);
+}
+
+struct B {
+  int i;
+};
+struct S4 : Empty, B {
+  bool b;
+};
+
+// CHECK-LABEL: define dso_local void @_Z23testStructWithEmptyBaseP2S4(
+// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    ret void
+//
+void testStructWithEmptyBase(S4 *s) {
+  // "i" [0-3], "b" [4], PAD [5-7]
+  __builtin_clear_padding(s);
+}
+
+struct B1 {
+  char c1;
+};
+struct B2 {
+  alignas(4) char c2;
+};
+struct S5 : B1, B2 {
+};
+
+// CHECK-LABEL: define dso_local void @_Z23testPaddingBetweenBasesP2S5(
+// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// CHECK-NEXT:    ret void
+//
+void testPaddingBetweenBases(S5 *s) {
+  // "c1" [0], PAD [1-3] , "c2" [4], PAD [5-7]
+  __builtin_clear_padding(s);
+}
+
+struct B3 {
+  char c1;
+};
+
+struct B4 {
+  char c2;
+};
+
+struct S6 : B3, B4 {
+  alignas(4) char c3;
+};
+
+// CHECK-LABEL: define dso_local void @_Z24testPaddingAfterLastBaseP2S6(
+// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    ret void
+//
+void testPaddingAfterLastBase(S6 *s) {
+  // "c1"[0], "c2"[1], PAD [2-3], "c3" [4], PAD [5-7]
+  __builtin_clear_padding(s);
+}
+
+
+struct VirtualBase {
+  unsigned int x;
+  virtual int call() { return x; };
+  virtual ~VirtualBase() = default;
+};
+
+struct NonVirtualBase {
+  char y;
+};
+
+struct S7 : VirtualBase, NonVirtualBase {
+  virtual int call() override { return 5; }
+  bool z;
+};
+
+// CHECK-LABEL: define dso_local void @_Z10testVtableP2S7(
+// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    ret void
+//
+void testVtable(S7 *s) {
+  // "vtable ptr" [0-7], "x" [8-11], "y" [12], "z" [13], PAD [14-15]
+  __builtin_clear_padding(s);
+}
+
+struct VirtualBase1 {
+  unsigned int x1;
+  virtual int call1() { return x1; };
+  virtual ~VirtualBase1() = default;
+};
+
+struct VirtualBase2 {
+  unsigned int x2;
+  virtual int call2() { return x2; };
+  virtual ~VirtualBase2() = default;
+};
+
+struct VirtualBase3 {
+  unsigned int x3;
+  virtual int call3() { return x3; };
+  virtual ~VirtualBase3() = default;
+};
+
+struct NonVirtualBase1 {
+  char y;
+};
+struct S8 : VirtualBase1, VirtualBase2, NonVirtualBase1, VirtualBase3 {
+  virtual int call1() override { return 5; }
+  bool z;
+};
+
+// CHECK-LABEL: define dso_local void @_Z23testMultipleBasesVtableP2S8(
+// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 45
+// CHECK-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 46
+// CHECK-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 47
+// CHECK-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// CHECK-NEXT:    ret void
+//
+void testMultipleBasesVtable(S8 *s) {
+  // "vtable ptr" [0-7], "x1" [8-11], PAD "[12-15]",
+  // "vtable ptr" [16-23], "x2" [24-27], "y" [28], PAD "[29-31]",
+  // "vtable_ptr" [32-39], "x3" [40-43], "z" [44], PAD [45-47]
+  __builtin_clear_padding(s);
+}
+
+struct VirtualChain1 {
+  unsigned int x1;
+  virtual int call1() { return x1; };
+  virtual ~VirtualChain1() = default;
+};
+
+struct VirtualChain2 : VirtualChain1 {
+  unsigned int x2;
+  virtual int call2() { return x2; };
+  virtual ~VirtualChain2() = default;
+};
+
+struct VirtualChain3 : VirtualChain2 {
+  unsigned int x3;
+  virtual int call3() { return x3; };
+  virtual ~VirtualChain3() = default;
+};
+
+struct NonVirtualBase2 {
+  char y;
+};
+
+struct S9 : NonVirtualBase2, VirtualChain3 {
+  bool z;
+};
+
+// CHECK-LABEL: define dso_local void @_Z16testVirtualChainP2S9(
+// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 22
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 23
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    ret void
+//
+void testVirtualChain(S9 *s) {
+  // This should clear the padding after the bool z.
+  // base reordered
+  // "vtable ptr" [0-7],  "x1" [8-11], x2 [12-15], x3 [16-19],
+  // y [20], z [21], PAD [22-23]
+  __builtin_clear_padding(s);
+}
+
+
+struct Base {
+  int x;
+};
+
+struct D1 : virtual Base {
+  int d1;
+  bool b1;
+};
+struct D2 : virtual Base {
+  int d2;
+  bool b2;
+};
+
+struct S10 : D1, D2 {
+  bool s;
+};
+
+// CHECK-LABEL: define dso_local void @_Z22testVirtualInheritanceP3S10(
+// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 36
+// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 37
+// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 38
+// CHECK-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 39
+// CHECK-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// CHECK-NEXT:    ret void
+//
+void testVirtualInheritance(S10 *s) {
+  // note derived member placed before the virtual base
+  // "vtable ptr" [0-7],  "d1" [8-11], "b1" [12], PAD [13-15],
+  // "vtable ptr" [16-23],  "d2" [24-27], "b2" [28], s [29],  PAD [30-31],
+  // "x" [32-35], PAD [36-39]
+  __builtin_clear_padding(s);
+}
+
+struct S11 {
+  // will usually occupy 2 bytes:
+  unsigned char b1 : 3; // 1st 3 bits (in 1st byte) are b1
+  unsigned char b2 : 2; // next 2 bits (in 1st byte). The rest bits in byte 1 are unused
+  unsigned char b3 : 6; // 6 bits for b3 - doesn't fit into the 1st byte => starts a 2nd
+  unsigned char b4 : 2; // 2 bits for b4 - next (and final) bits in the 2nd byte
+};
+
+// CHECK-LABEL: define dso_local void @_Z13testBitFieldsP3S11(
+// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], 31
+// CHECK-NEXT:    store i8 [[TMP3]], ptr [[TMP1]], align 1
+// CHECK-NEXT:    ret void
+//
+void testBitFields(S11 *s) {
+  // "b1" [0-2], "b2" [3-4], PAD [5-7], "b3" [8-13], "b4" [14-15]
+  // to clear 5-7, we should AND 0b00011111 (31)
+  __builtin_clear_padding(s);
+}
+
+
+// CHECK-LABEL: define dso_local void @_Z18testArrayNoPaddingv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I:%.*]] = alloca [4 x i32], align 16
+// CHECK-NEXT:    ret void
+//
+void testArrayNoPadding() {
+  int i[4];
+  // there is no padding in the array.
+  __builtin_clear_padding(&i);
+}
+
+// CHECK-LABEL: define dso_local void @_Z19testArrayLongDoubleRA2_e(
+// CHECK-SAME: ptr noundef nonnull align 16 dereferenceable(32) [[ARR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ARR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[LD:%.*]] = alloca [2 x x86_fp80], align 16
+// CHECK-NEXT:    store ptr [[ARR]], ptr [[ARR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[LD]], i32 10
+// CHECK-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[LD]], i32 11
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[LD]], i32 12
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[LD]], i32 13
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[LD]], i32 14
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[LD]], i32 15
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[LD]], i32 26
+// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[LD]], i32 27
+// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[LD]], i32 28
+// CHECK-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[LD]], i32 29
+// CHECK-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[LD]], i32 30
+// CHECK-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[LD]], i32 31
+// CHECK-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// CHECK-NEXT:    ret void
+//
+void testArrayLongDouble(long double (&arr)[2]) {
+  // long double 0, [0-9] PAD [10-15]
+  // long double 1, [16-25] PAD [26-31]
+  long double ld[2];
+  __builtin_clear_padding(&ld);
+}
+
+// CHECK-LABEL: define dso_local void @_Z17testArrayOfStructv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S:%.*]] = alloca [2 x %struct.S.0], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 5
+// CHECK-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 6
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 13
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 15
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 21
+// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 22
+// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 23
+// CHECK-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 29
+// CHECK-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[S]], i32 30
+// CHECK-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[S]], i32 31
+// CHECK-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// CHECK-NEXT:    ret void
+//
+void testArrayOfStruct() {
+  struct S {
+    int i1;
+    char c1;
+    int i2;
+    char c2;
+  };
+
+  // S[0].i1 [0-3], S[0].c1 [4], PAD [5-7],
+  // S[0].i2 [8-11], S[0].c2 [12], PAD [13-15],
+  // S[1].i1 [16-19], S[1].c1 [20], PAD [21-23],
+  // S[1].i2 [24-27], S[1].c2 [28], PAD [29-31]
+
+  S s[2];
+  __builtin_clear_padding(&s);
+}
+
 struct ArrOfStructsWithPadding {
   Bar bars[2];
 };
@@ -89,24 +767,22 @@ struct ArrOfStructsWithPadding {
 // "c" (1), PAD_1, "d" (1), PAD_2, "c" (2), PAD_3, "d" (2), PAD_4
 // %struct.ArrOfStructsWithPadding = type { [2 x %struct.Bar] }
 
-// CHECK-LABEL: define void @_Z27testArrOfStructsWithPaddingP23ArrOfStructsWithPadding(%struct.ArrOfStructsWithPadding* %arr)
-// CHECK: [[ARR_ADDR:%.*]] = alloca %struct.ArrOfStructsWithPadding*
-// CHECK: store %struct.ArrOfStructsWithPadding* %arr, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
-// CHECK: [[ARR:%.*]] = load %struct.ArrOfStructsWithPadding*, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
-// CHECK: [[BARS:%.*]] = getelementptr inbounds %struct.ArrOfStructsWithPadding, %struct.ArrOfStructsWithPadding* [[ARR]], i32 0, i32 0
-// CHECK: [[FIRST:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 0
-// CHECK: [[FIRST_RAW_PTR:%.*]] = bitcast %struct.Bar* [[FIRST]] to i8*
-// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FIRST_RAW_PTR]], i32 1
-// CHECK: store i8 0, i8* [[PAD_1]]
-// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* %4, i32 3
-// CHECK: store i8 0, i8* [[PAD_2]]
-// CHECK: [[SECOND:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 1
-// CHECK: [[SECOND_RAW_PTR:%.*]] = bitcast %struct.Bar* [[SECOND]] to i8*
-// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 1
-// CHECK: store i8 0, i8* [[PAD_3]]
-// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 3
-// CHECK: store i8 0, i8* [[PAD_4]]
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @_Z27testArrOfStructsWithPaddingP23ArrOfStructsWithPadding(
+// CHECK-SAME: ptr noundef [[ARR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ARR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[ARR]], ptr [[ARR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    ret void
+//
 void testArrOfStructsWithPadding(ArrOfStructsWithPadding *arr) {
   __builtin_clear_padding(arr);
 }



More information about the libcxx-commits mailing list