[libcxx-commits] [clang] [libcxx] [clang] Add builtin to clear padding bytes (prework for P0528R3) (PR #75371)

via libcxx-commits libcxx-commits at lists.llvm.org
Sat May 2 02:46:50 PDT 2026


https://github.com/huixie90 updated https://github.com/llvm/llvm-project/pull/75371

>From 3987599c9d88e7da142c28e223aad3bb72b50dd3 Mon Sep 17 00:00:00 2001
From: zoecarver <z.zoelec2 at gmail.com>
Date: Sat, 2 Dec 2023 20:00:30 +0000
Subject: [PATCH 01/22] [Builtin] Add __builtin_clear_padding

Adds `__builtin_clear_padding` to zero all padding bits of a struct. This builtin should match the behavior of those in NVCC and GCC (and MSVC?). There are some tests in this patch but hopefully we'll also get tests from other compilers (so all builtins can be as similar as possible).

I'm planning to add support for unions, bitfields (both as members and members of sub-objects), and booleans as follow up patches.

Differential Revision: https://reviews.llvm.org/D87974

overlapping subobjects + opague pointer

union, rename, scalar types
---
 clang/include/clang/Basic/Builtins.td         |   5 +
 clang/lib/CodeGen/CGBuiltin.cpp               | 209 +++++
 clang/lib/Sema/SemaChecking.cpp               |  30 +
 .../builtin-clear-padding-codegen.cpp         | 112 +++
 clang/test/SemaCXX/builtin-clear-padding.cpp  |  15 +
 .../atomics/builtin_clear_padding.pass.cpp    | 807 ++++++++++++++++++
 6 files changed, 1178 insertions(+)
 create mode 100644 clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
 create mode 100644 clang/test/SemaCXX/builtin-clear-padding.cpp
 create mode 100644 libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 4a7eaeb3d353e..e881fda614330 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1261,6 +1261,11 @@ def IsWithinLifetime : LangBuiltin<"CXX_LANG"> {
   let Spellings = ["__builtin_is_within_lifetime"];
   let Attributes = [NoThrow, CustomTypeChecking, Consteval];
   let Prototype = "bool(void*)";
+
+def ClearPadding : LangBuiltin<"CXX_LANG"> {
+  let Spellings = ["__builtin_clear_padding"];
+  let Attributes = [NoThrow];
+  let Prototype = "void(void*)";
 }
 
 def GetVtablePointer : LangBuiltin<"CXX_LANG"> {
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 67de2a34f44ea..492d4a5674254 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -35,6 +35,9 @@
 #include "llvm/IR/MatrixBuilder.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/ScopedPrinter.h"
+#include "llvm/TargetParser/AArch64TargetParser.h"
+#include "llvm/TargetParser/X86TargetParser.h"
+#include <algorithm>
 #include <optional>
 #include <utility>
 
@@ -2698,6 +2701,205 @@ RValue CodeGenFunction::emitStdcFirstBit(const CallExpr *E, Intrinsic::ID IntID,
   return RValue::get(Result);
 }
 
+template <class T>
+void RecursivelyClearPaddingImpl(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
+                                 size_t CurrentStartOffset,
+                                 size_t &RunningOffset, T &&WriteZeroAtOffset,
+                                 bool VisitVirtualBase);
+
+template <class T>
+void ClearPaddingStruct(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
+                        StructType *ST, size_t CurrentStartOffset,
+                        size_t &RunningOffset, T &&WriteZeroAtOffset,
+                        bool VisitVirtualBase) {
+  llvm::dbgs() << "clear padding struct: " << ST->getName().data() << '\n';
+  const auto &DL = CGF.CGM.getModule().getDataLayout();
+  auto *SL = DL.getStructLayout(ST);
+  auto *R = dyn_cast<CXXRecordDecl>(Ty->getAsRecordDecl());
+  if (!R) {
+    llvm::dbgs() << "Not a CXXRecordDecl\n";
+    return;
+  }
+  const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R);
+  if (ASTLayout.hasOwnVFPtr()) {
+    llvm::dbgs() << "vtable ptr. Incrementing RunningOffset from "
+                 << RunningOffset << " to "
+                 << RunningOffset + DL.getPointerSizeInBits() / 8 << '\n';
+    RunningOffset += DL.getPointerSizeInBits() / 8;
+  }
+  std::vector<std::pair<size_t, CXXBaseSpecifier>> Bases;
+  Bases.reserve(R->getNumBases());
+  // todo get vbases
+  for (auto Base : R->bases()) {
+    auto *BaseRecord = cast<CXXRecordDecl>(Base.getType()->getAsRecordDecl());
+    if (!Base.isVirtual()) {
+      auto Offset = static_cast<size_t>(
+          ASTLayout.getBaseClassOffset(BaseRecord).getQuantity());
+      Bases.emplace_back(Offset, Base);
+    }
+  }
+
+  auto VisitBases =
+      [&](std::vector<std::pair<size_t, CXXBaseSpecifier>> &BasesToVisit) {
+        std::sort(
+            BasesToVisit.begin(), BasesToVisit.end(),
+            [](const auto &P1, const auto &P2) { return P1.first < P2.first; });
+        for (const auto &Pair : BasesToVisit) {
+          // is it OK to use structured binding in clang? what is the language
+          // version?
+          auto Offset = Pair.first;
+          auto Base = Pair.second;
+
+          llvm::dbgs() << "visiting base at offset " << Offset << '\n';
+          // Recursively zero out base classes.
+          auto Index = SL->getElementContainingOffset(Offset);
+          Value *Idx = CGF.Builder.getSize(Index);
+          llvm::Type *CurrentBaseType = CGF.ConvertTypeForMem(Base.getType());
+          Value *BaseElement = CGF.Builder.CreateGEP(CurrentBaseType, Ptr, Idx);
+          RecursivelyClearPaddingImpl(CGF, BaseElement, Base.getType(),
+                                      CurrentStartOffset + Offset,
+                                      RunningOffset, WriteZeroAtOffset, false);
+        }
+      };
+
+  VisitBases(Bases);
+
+  size_t NumFields = std::distance(R->field_begin(), R->field_end());
+  std::vector<size_t> FieldOffsets;
+  FieldOffsets.reserve(NumFields);
+  auto CurrentField = R->field_begin();
+  for (size_t I = 0; I < NumFields; ++I, ++CurrentField) {
+    // Size needs to be in bytes so we can compare it later.
+    auto Offset = ASTLayout.getFieldOffset(I) / 8;
+    llvm::dbgs() << "visiting field at offset " << Offset << '\n';
+    auto Index = SL->getElementContainingOffset(Offset);
+    Value *Idx = CGF.Builder.getSize(Index);
+    llvm::Type *CurrentFieldType =
+        CGF.ConvertTypeForMem(CurrentField->getType());
+    Value *Element = CGF.Builder.CreateGEP(CurrentFieldType, Ptr, Idx);
+    RecursivelyClearPaddingImpl(CGF, Element, CurrentField->getType(),
+                                CurrentStartOffset + Offset, RunningOffset,
+                                WriteZeroAtOffset, true);
+  }
+
+  if (VisitVirtualBase) {
+
+    std::vector<std::pair<size_t, CXXBaseSpecifier>> VBases;
+    VBases.reserve(R->getNumVBases());
+    for (auto VBase : R->vbases()) {
+      auto *BaseRecord =
+          cast<CXXRecordDecl>(VBase.getType()->getAsRecordDecl());
+      auto Offset = static_cast<size_t>(
+          ASTLayout.getVBaseClassOffset(BaseRecord).getQuantity());
+      VBases.emplace_back(Offset, VBase);
+    }
+
+    VisitBases(VBases);
+  }
+}
+
+template <class T>
+void ClearPaddingConstantArray(CodeGenFunction &CGF, Value *Ptr,
+                               llvm::Type *Type, ConstantArrayType const *AT,
+                               size_t CurrentStartOffset, size_t &RunningOffset,
+                               T &&WriteZeroAtOffset) {
+  llvm::dbgs() << "clear padding constant array\n";
+  for (size_t ArrIndex = 0; ArrIndex < AT->getSize().getLimitedValue();
+       ++ArrIndex) {
+
+    QualType ElementQualType = AT->getElementType();
+
+    auto *ElementRecord = ElementQualType->getAsRecordDecl();
+    if (!ElementRecord) {
+      llvm::dbgs() << "null!\n";
+    }
+    auto ElementAlign =
+        ElementRecord
+            ? CGF.getContext().getASTRecordLayout(ElementRecord).getAlignment()
+            : CGF.getContext().getTypeAlignInChars(ElementQualType);
+
+    Address FieldElementAddr{Ptr, Type, ElementAlign};
+
+    auto Element = CGF.Builder.CreateConstArrayGEP(FieldElementAddr, ArrIndex);
+    auto *ElementType = CGF.ConvertTypeForMem(ElementQualType);
+    auto AllocSize =
+        CGF.CGM.getModule().getDataLayout().getTypeAllocSize(ElementType);
+    llvm::dbgs() << "clearing array index! " << ArrIndex << '\n';
+    RecursivelyClearPaddingImpl(CGF, Element.getBasePointer(), ElementQualType,
+                                CurrentStartOffset +
+                                    ArrIndex * AllocSize.getKnownMinValue(),
+                                RunningOffset, WriteZeroAtOffset, true);
+  }
+}
+
+template <class T>
+void RecursivelyClearPaddingImpl(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
+                                 size_t CurrentStartOffset,
+                                 size_t &RunningOffset, T &&WriteZeroAtOffset,
+                                 bool VisitVirtualBase) {
+
+  llvm::dbgs() << "clear padding before current  [" << RunningOffset << ", "
+               << CurrentStartOffset << ")\n";
+  for (; RunningOffset < CurrentStartOffset; ++RunningOffset) {
+    WriteZeroAtOffset(RunningOffset);
+  }
+  auto *Type = CGF.ConvertTypeForMem(Ty);
+  auto Size = CGF.CGM.getModule()
+                  .getDataLayout()
+                  .getTypeSizeInBits(Type)
+                  .getKnownMinValue() /
+              8;
+
+  if (auto *AT = dyn_cast<ConstantArrayType>(Ty)) {
+    ClearPaddingConstantArray(CGF, Ptr, Type, AT, CurrentStartOffset,
+                              RunningOffset, WriteZeroAtOffset);
+  } else if (auto *ST = dyn_cast<StructType>(Type); ST && Ty->isRecordType()) {
+    ClearPaddingStruct(CGF, Ptr, Ty, ST, CurrentStartOffset, RunningOffset,
+                       WriteZeroAtOffset, VisitVirtualBase);
+  } else if (Ty->isAtomicType()) {
+    RecursivelyClearPaddingImpl(CGF, Ptr, Ty.getAtomicUnqualifiedType(),
+                                CurrentStartOffset, RunningOffset,
+                                WriteZeroAtOffset, true);
+  } else {
+    llvm::dbgs() << "increment running offset from: " << RunningOffset << " to "
+                 << RunningOffset + Size << '\n';
+    RunningOffset =
+        std::max(RunningOffset, CurrentStartOffset + static_cast<size_t>(Size));
+  }
+}
+
+static void RecursivelyClearPadding(CodeGenFunction &CGF, Value *Ptr,
+                                    QualType Ty) {
+  auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
+  auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
+  auto WriteZeroAtOffset = [&](uint64_t Offset) {
+    auto *Index = ConstantInt::get(CGF.IntTy, Offset);
+    auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+    CGF.Builder.CreateAlignedStore(
+        Zero, Element,
+        CharUnits::One().alignmentAtOffset(CharUnits::fromQuantity(Offset)));
+  };
+
+  size_t RunningOffset = 0;
+
+  RecursivelyClearPaddingImpl(CGF, Ptr, Ty, 0, RunningOffset, WriteZeroAtOffset,
+                              true);
+
+  // Clear tail padding
+  auto *Type = CGF.ConvertTypeForMem(Ty);
+
+  auto Size = CGF.CGM.getModule()
+                  .getDataLayout()
+                  .getTypeAllocSize(Type)
+                  .getKnownMinValue();
+
+  llvm::dbgs() << "clear tail padding  [" << RunningOffset << ", " << Size
+               << ")\n";
+  for (; RunningOffset < Size; ++RunningOffset) {
+    WriteZeroAtOffset(RunningOffset);
+  }
+}
+
 RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
                                         const CallExpr *E,
                                         ReturnValueSlot ReturnValue) {
@@ -5189,6 +5391,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
 
     return RValue::get(Ptr);
   }
+  case Builtin::BI__builtin_clear_padding: {
+    const Expr *Op = E->getArg(0);
+    Value *Address = EmitScalarExpr(Op);
+    auto PointeeTy = Op->getType()->getPointeeType();
+    RecursivelyClearPadding(*this, Address, PointeeTy);
+    return RValue::get(nullptr);
+  }
   case Builtin::BI__sync_fetch_and_add:
   case Builtin::BI__sync_fetch_and_sub:
   case Builtin::BI__sync_fetch_and_or:
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index eb957df6f1e97..e175f06b5304c 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3124,7 +3124,37 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
     return BuiltinIsWithinLifetime(*this, TheCall);
   case Builtin::BI__builtin_trivially_relocate:
     return BuiltinTriviallyRelocate(*this, TheCall);
+  case Builtin::BI__builtin_clear_padding: {
+    const auto numArgs = TheCall->getNumArgs();
+    if (numArgs < 1) {
+      Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_few_args_one)
+          << 0 /*function call*/ << "T*" << 0;
+      return ExprError();
+    }
+    if (numArgs > 1) {
+      Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_many_args_one)
+          << 0 /*function call*/ << "T*" << numArgs << 0;
+      return ExprError();
+    }
 
+    const Expr *PtrArg = TheCall->getArg(0);
+    const QualType PtrArgType = PtrArg->getType();
+    if (!PtrArgType->isPointerType()) {
+      Diag(PtrArg->getBeginLoc(), diag::err_typecheck_convert_incompatible)
+          << PtrArgType << "pointer" << 1 << 0 << 3 << 1 << PtrArgType
+          << "pointer";
+      return ExprError();
+    }
+    if (PtrArgType->getPointeeType().isConstQualified()) {
+      Diag(PtrArg->getBeginLoc(), diag::err_typecheck_assign_const)
+          << TheCall->getSourceRange() << 5 /*ConstUnknown*/;
+      return ExprError();
+    }
+    if (RequireCompleteType(PtrArg->getBeginLoc(), PtrArgType->getPointeeType(),
+                            diag::err_typecheck_decl_incomplete_type))
+      return ExprError();
+    break;
+  }
   case Builtin::BI__sync_fetch_and_add:
   case Builtin::BI__sync_fetch_and_add_1:
   case Builtin::BI__sync_fetch_and_add_2:
diff --git a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
new file mode 100644
index 0000000000000..54455e6699849
--- /dev/null
+++ b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
@@ -0,0 +1,112 @@
+// RUN: %clang_cc1 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+struct alignas(4) Foo {
+  char a;
+  alignas(2) char b;
+};
+
+struct alignas(4) Bar {
+  char c;
+  alignas(2) char d;
+};
+
+struct alignas(4) Baz : Foo {
+  char e;
+  Bar f;
+};
+
+// Baz structure:
+// "a", PAD_1, "b", PAD_2, "c", PAD_3, PAD_4, PAD_5, "c", PAD_6, "d", PAD_7
+// %struct.Baz = type { %struct.Foo, i8, [3 x i8], %struct.Bar }
+// %struct.Foo = type { i8, i8, i8, i8 }
+// %struct.Bar = type { i8, i8, i8, i8 }
+
+// CHECK-LABEL: define void @_Z7testBazP3Baz(%struct.Baz* %baz)
+// CHECK: [[ADDR:%.*]] = alloca %struct.Baz*
+// CHECK: store %struct.Baz* %baz, %struct.Baz** [[ADDR]]
+// CHECK: [[BAZ:%.*]] = load %struct.Baz*, %struct.Baz** [[ADDR]]
+// CHECK: [[BAZ_RAW_PTR:%.*]] = bitcast %struct.Baz* [[BAZ]] to i8*
+
+// CHECK: [[FOO_BASE:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 0
+// CHECK: [[FOO_RAW_PTR:%.*]] = bitcast %struct.Foo* [[FOO_BASE]] to i8*
+// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 1
+// CHECK: store i8 0, i8* [[PAD_1]]
+// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 3
+// CHECK: store i8 0, i8* [[PAD_2]]
+
+// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 5
+// CHECK: store i8 0, i8* [[PAD_3]]
+// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 6
+// CHECK: store i8 0, i8* [[PAD_4]]
+// CHECK: [[PAD_5:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 7
+// CHECK: store i8 0, i8* [[PAD_5]]
+
+// CHECK: [[BAR_MEMBER:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 3
+// CHECK: [[BAR_RAW_PTR:%.*]] = bitcast %struct.Bar* [[BAR_MEMBER]] to i8*
+// CHECK: [[PAD_6:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 1
+// CHECK: store i8 0, i8* [[PAD_6]]
+// CHECK: [[PAD_7:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 3
+// CHECK: store i8 0, i8* [[PAD_7]]
+// CHECK: ret void
+void testBaz(Baz *baz) {
+  __builtin_clear_padding(baz);
+}
+
+struct UnsizedTail {
+  int size;
+  alignas(8) char buf[];
+
+  UnsizedTail(int size) : size(size) {}
+};
+
+// UnsizedTail structure:
+// "size", PAD_1, PAD_2, PAD_3, PAD_4
+// %struct.UnsizedTail = type { i32, [4 x i8], [0 x i8] }
+
+// CHECK-LABEL: define void @_Z15testUnsizedTailP11UnsizedTail(%struct.UnsizedTail* %u)
+// CHECK: [[U_ADDR:%.*]] = alloca %struct.UnsizedTail*
+// CHECK: store %struct.UnsizedTail* %u, %struct.UnsizedTail** [[U_ADDR]]
+// CHECK: [[U:%.*]] = load %struct.UnsizedTail*, %struct.UnsizedTail** [[U_ADDR]]
+// CHECK: [[U_RAW_PTR:%.*]] = bitcast %struct.UnsizedTail* [[U]] to i8*
+// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 4
+// CHECK: store i8 0, i8* [[PAD_1]]
+// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 5
+// CHECK: store i8 0, i8* [[PAD_2]]
+// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 6
+// CHECK: store i8 0, i8* [[PAD_3]]
+// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 7
+// CHECK: store i8 0, i8* [[PAD_4]]
+// CHECK: ret void
+void testUnsizedTail(UnsizedTail *u) {
+  __builtin_clear_padding(u);
+}
+
+struct ArrOfStructsWithPadding {
+  Bar bars[2];
+};
+
+// ArrOfStructsWithPadding structure:
+// "c" (1), PAD_1, "d" (1), PAD_2, "c" (2), PAD_3, "d" (2), PAD_4
+// %struct.ArrOfStructsWithPadding = type { [2 x %struct.Bar] }
+
+// CHECK-LABEL: define void @_Z27testArrOfStructsWithPaddingP23ArrOfStructsWithPadding(%struct.ArrOfStructsWithPadding* %arr)
+// CHECK: [[ARR_ADDR:%.*]] = alloca %struct.ArrOfStructsWithPadding*
+// CHECK: store %struct.ArrOfStructsWithPadding* %arr, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
+// CHECK: [[ARR:%.*]] = load %struct.ArrOfStructsWithPadding*, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
+// CHECK: [[BARS:%.*]] = getelementptr inbounds %struct.ArrOfStructsWithPadding, %struct.ArrOfStructsWithPadding* [[ARR]], i32 0, i32 0
+// CHECK: [[FIRST:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 0
+// CHECK: [[FIRST_RAW_PTR:%.*]] = bitcast %struct.Bar* [[FIRST]] to i8*
+// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FIRST_RAW_PTR]], i32 1
+// CHECK: store i8 0, i8* [[PAD_1]]
+// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* %4, i32 3
+// CHECK: store i8 0, i8* [[PAD_2]]
+// CHECK: [[SECOND:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 1
+// CHECK: [[SECOND_RAW_PTR:%.*]] = bitcast %struct.Bar* [[SECOND]] to i8*
+// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 1
+// CHECK: store i8 0, i8* [[PAD_3]]
+// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 3
+// CHECK: store i8 0, i8* [[PAD_4]]
+// CHECK: ret void
+void testArrOfStructsWithPadding(ArrOfStructsWithPadding *arr) {
+  __builtin_clear_padding(arr);
+}
diff --git a/clang/test/SemaCXX/builtin-clear-padding.cpp b/clang/test/SemaCXX/builtin-clear-padding.cpp
new file mode 100644
index 0000000000000..ea87249c87b0a
--- /dev/null
+++ b/clang/test/SemaCXX/builtin-clear-padding.cpp
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+struct Foo {};
+
+struct Incomplete; // expected-note {{forward declaration of 'Incomplete'}}
+
+void test(int a, Foo b, void *c, int *d, Foo *e, const Foo *f, Incomplete *g) {
+  __builtin_clear_padding(a); // expected-error {{passing 'int' to parameter of incompatible type pointer: type mismatch at 1st parameter ('int' vs pointer)}}
+  __builtin_clear_padding(b); // expected-error {{passing 'Foo' to parameter of incompatible type pointer: type mismatch at 1st parameter ('Foo' vs pointer)}}
+  __builtin_clear_padding(c); // expected-error {{variable has incomplete type 'void'}}
+  __builtin_clear_padding(d); // This should not error.
+  __builtin_clear_padding(e); // This should not error.
+  __builtin_clear_padding(f); // expected-error {{read-only variable is not assignable}}
+  __builtin_clear_padding(g); // expected-error {{variable has incomplete type 'Incomplete'}}
+}
diff --git a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
new file mode 100644
index 0000000000000..d504ac58e43ae
--- /dev/null
+++ b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
@@ -0,0 +1,807 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// UNSUPPORTED: c++03
+
+// ADDITIONAL_COMPILE_FLAGS: -Wno-deprecated-volatile -Wno-dynamic-class-memaccess
+
+#include <cassert>
+#include <cstdio>
+#include <cstring>
+#include <new>
+
+template <class T>
+void print_bytes(const T* object) {
+  auto size                        = sizeof(T);
+  const unsigned char* const bytes = reinterpret_cast<const unsigned char*>(object);
+  size_t i;
+
+  fprintf(stderr, "[ ");
+  for (i = 0; i < size; i++) {
+    fprintf(stderr, "%02x ", bytes[i]);
+  }
+  fprintf(stderr, "]\n");
+}
+
+template <class T>
+void __builtin_clear_padding2(T t) {
+  __builtin_clear_padding(t);
+}
+
+template <size_t A1, size_t A2, class T>
+struct alignas(A1) BasicWithPadding {
+  T x;
+  alignas(A2) T y;
+};
+
+template <size_t A1, size_t A2, size_t N, class T>
+struct alignas(A1) SpacedArrayMembers {
+  T x[N];
+  alignas(A2) char c;
+  T y[N];
+};
+
+template <size_t A1, size_t A2, class T>
+struct alignas(A1) PaddedPointerMembers {
+  T* x;
+  alignas(A2) T* y;
+};
+
+template <size_t A1, size_t A2, size_t A3, class T>
+struct alignas(A1) ThreeMembers {
+  T x;
+  alignas(A2) T y;
+  alignas(A3) T z;
+};
+
+template <class T>
+struct Normal {
+  T a;
+  T b;
+};
+
+template <class T>
+struct X {
+  T x;
+};
+
+template <class T>
+struct Z {
+  T z;
+};
+
+template <size_t A, class T>
+struct YZ : public Z<T> {
+  alignas(A) T y;
+};
+
+template <size_t A1, size_t A2, class T>
+struct alignas(A1) HasBase : public X<T>, public YZ<A2, T> {
+  T a;
+  alignas(A2) T b;
+};
+
+template <size_t A1, size_t A2, class T>
+void testAllStructsForType(T a, T b, T c, T d) {
+  // basic padding
+  {
+    using B = BasicWithPadding<A1, A2, T>;
+    B basic1;
+    memset(&basic1, 0, sizeof(B));
+    basic1.x = a;
+    basic1.y = b;
+    B basic2;
+    memset(&basic2, 42, sizeof(B));
+    basic2.x = a;
+    basic2.y = b;
+    assert(memcmp(&basic1, &basic2, sizeof(B)) != 0);
+    __builtin_clear_padding2(&basic2);
+    assert(memcmp(&basic1, &basic2, sizeof(B)) == 0);
+  }
+
+  // spaced array
+  {
+    using A = SpacedArrayMembers<A1, A2, 2, T>;
+    A arr1;
+    memset(&arr1, 0, sizeof(A));
+    arr1.x[0] = a;
+    arr1.x[1] = b;
+    arr1.y[0] = c;
+    arr1.y[1] = d;
+    A arr2;
+    memset(&arr2, 42, sizeof(A));
+    arr2.x[0] = a;
+    arr2.x[1] = b;
+    arr2.y[0] = c;
+    arr2.y[1] = d;
+    arr2.c    = 0;
+    assert(memcmp(&arr1, &arr2, sizeof(A)) != 0);
+    __builtin_clear_padding2(&arr2);
+    assert(memcmp(&arr1, &arr2, sizeof(A)) == 0);
+  }
+
+  // pointer members
+  {
+    using P = PaddedPointerMembers<A1, A2, T>;
+    P ptr1;
+    memset(&ptr1, 0, sizeof(P));
+    ptr1.x = &a;
+    ptr1.y = &b;
+    P ptr2;
+    memset(&ptr2, 42, sizeof(P));
+    ptr2.x = &a;
+    ptr2.y = &b;
+    assert(memcmp(&ptr1, &ptr2, sizeof(P)) != 0);
+    __builtin_clear_padding2(&ptr2);
+    assert(memcmp(&ptr1, &ptr2, sizeof(P)) == 0);
+  }
+
+  // three members
+  {
+    using Three = ThreeMembers<A1, A2, A2, T>;
+    Three three1;
+    memset(&three1, 0, sizeof(Three));
+    three1.x = a;
+    three1.y = b;
+    three1.z = c;
+    Three three2;
+    memset(&three2, 42, sizeof(Three));
+    three2.x = a;
+    three2.y = b;
+    three2.z = c;
+    __builtin_clear_padding2(&three2);
+    assert(memcmp(&three1, &three2, sizeof(Three)) == 0);
+  }
+
+  // Normal struct no padding
+  {
+    using N = Normal<T>;
+    N normal1;
+    memset(&normal1, 0, sizeof(N));
+    normal1.a = a;
+    normal1.b = b;
+    N normal2;
+    memset(&normal2, 42, sizeof(N));
+    normal2.a = a;
+    normal2.b = b;
+    __builtin_clear_padding2(&normal2);
+    assert(memcmp(&normal1, &normal2, sizeof(N)) == 0);
+  }
+
+  // base class
+  {
+    using H = HasBase<A1, A2, T>;
+    H base1;
+    memset(&base1, 0, sizeof(H));
+    base1.a = a;
+    base1.b = b;
+    base1.x = c;
+    base1.y = d;
+    base1.z = a;
+    H base2;
+    memset(&base2, 42, sizeof(H));
+    base2.a = a;
+    base2.b = b;
+    base2.x = c;
+    base2.y = d;
+    base2.z = a;
+    assert(memcmp(&base1, &base2, sizeof(H)) != 0);
+    __builtin_clear_padding2(&base2);
+    assert(memcmp(&base1, &base2, sizeof(H)) == 0);
+  }
+}
+
+struct UnsizedTail {
+  int size;
+  alignas(8) char buf[];
+
+  UnsizedTail(int size) : size(size) {}
+};
+
+void otherStructTests() {
+  // Unsized Tail
+  {
+    const size_t size1 = sizeof(UnsizedTail) + 4;
+    char buff1[size1];
+    char buff2[size1];
+    memset(buff1, 0, size1);
+    memset(buff2, 42, size1);
+    auto* u1   = new (buff1) UnsizedTail(4);
+    u1->buf[0] = 1;
+    u1->buf[1] = 2;
+    u1->buf[2] = 3;
+    u1->buf[3] = 4;
+    auto* u2   = new (buff2) UnsizedTail(4);
+    u2->buf[0] = 1;
+    u2->buf[1] = 2;
+    u2->buf[2] = 3;
+    u2->buf[3] = 4;
+    assert(memcmp(u1, u2, sizeof(UnsizedTail)) != 0);
+    __builtin_clear_padding2(u2);
+
+    assert(memcmp(u1, u2, sizeof(UnsizedTail)) == 0);
+  }
+
+  // basic padding on the heap
+  {
+    using B      = BasicWithPadding<8, 4, char>;
+    auto* basic1 = new B;
+    memset(basic1, 0, sizeof(B));
+    basic1->x    = 1;
+    basic1->y    = 2;
+    auto* basic2 = new B;
+    memset(basic2, 42, sizeof(B));
+    basic2->x = 1;
+    basic2->y = 2;
+    assert(memcmp(basic1, basic2, sizeof(B)) != 0);
+    __builtin_clear_padding2(basic2);
+    assert(memcmp(basic1, basic2, sizeof(B)) == 0);
+    delete basic2;
+    delete basic1;
+  }
+
+  // basic padding volatile on the heap
+  {
+    using B   = BasicWithPadding<8, 4, char>;
+    B* basic3 = new B;
+    memset(basic3, 0, sizeof(B));
+    basic3->x = 1;
+    basic3->y = 2;
+    B* basic4 = new B;
+    memset(basic4, 42, sizeof(B));
+    basic4->x = 1;
+    basic4->y = 2;
+    assert(memcmp(basic3, basic4, sizeof(B)) != 0);
+    __builtin_clear_padding2(const_cast<volatile B*>(basic4));
+    __builtin_clear_padding2(basic4);
+    assert(memcmp(basic3, basic4, sizeof(B)) == 0);
+    delete basic4;
+    delete basic3;
+  }
+}
+
+struct Foo {
+  int x;
+  int y;
+};
+
+typedef float Float4Vec __attribute__((ext_vector_type(4)));
+typedef float Float3Vec __attribute__((ext_vector_type(3)));
+
+void primitiveTests() {
+  // no padding
+  {
+    int i1 = 42, i2 = 42;
+    __builtin_clear_padding2(&i1); // does nothing
+    assert(i1 == 42);
+    assert(memcmp(&i1, &i2, sizeof(int)) == 0);
+  }
+
+  // long double
+  {
+    long double d1, d2;
+    memset(&d1, 42, sizeof(long double));
+    memset(&d2, 0, sizeof(long double));
+
+    d1 = 3.0L;
+    d2 = 3.0L;
+
+    __builtin_clear_padding2(&d1);
+    assert(d1 == 3.0L);
+    assert(memcmp(&d1, &d2, sizeof(long double)) == 0);
+  }
+}
+
+void structTests() {
+  // no_unique_address
+  {
+    struct S1 {
+      int x;
+      char c;
+    };
+
+    struct S2 {
+      [[no_unique_address]] S1 s;
+      bool b;
+    };
+
+    S2 s1, s2;
+    memset(&s1, 42, sizeof(S2));
+    memset(&s2, 0, sizeof(S2));
+
+    s1.s.x = 4;
+    s1.s.c = 'a';
+    s1.b   = true;
+    s2.s.x = 4;
+    s2.s.c = 'a';
+    s2.b   = true;
+
+    assert(memcmp(&s1, &s2, sizeof(S2)) != 0);
+    __builtin_clear_padding2(&s1);
+    assert(s1.s.x == 4);
+    assert(s1.s.c == 'a');
+    assert(s1.b == true);
+
+    assert(memcmp(&s1, &s2, sizeof(S2)) == 0);
+  }
+
+  // struct with long double
+  {
+    struct S {
+      long double l;
+      bool b;
+    };
+
+    S s1, s2;
+    memset(&s1, 42, sizeof(S));
+    memset(&s2, 0, sizeof(S));
+
+    s1.l = 3.0L;
+    s1.b = true;
+    s2.l = 3.0L;
+    s2.b = true;
+
+    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    __builtin_clear_padding2(&s1);
+    assert(s1.l == 3.0L);
+    assert(s1.b == true);
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+  }
+
+  // EBO
+  {
+    struct Empty {};
+    struct B {
+      int i;
+    };
+    struct S : Empty, B {
+      bool b;
+    };
+
+    S s1, s2;
+    memset(&s1, 42, sizeof(S));
+    memset(&s2, 0, sizeof(S));
+
+    s1.i = 4;
+    s1.b = true;
+    s2.i = 4;
+    s2.b = true;
+
+    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    __builtin_clear_padding2(&s1);
+    assert(s1.i == 4);
+    assert(s1.b == true);
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+  }
+
+  // padding between bases
+  {
+    struct B1 {
+      char c1;
+    };
+    struct B2 {
+      alignas(4) char c2;
+    };
+
+    struct S : B1, B2 {};
+
+    S s1, s2;
+    memset(&s1, 42, sizeof(S));
+    memset(&s2, 0, sizeof(S));
+
+    s1.c1 = 'a';
+    s1.c2 = 'b';
+    s2.c1 = 'a';
+    s2.c2 = 'b';
+
+    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    __builtin_clear_padding2(&s1);
+    assert(s1.c1 == 'a');
+    assert(s1.c2 == 'b');
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+  }
+
+  // padding after last base
+  {
+    struct B1 {
+      char c1;
+    };
+    struct B2 {
+      char c2;
+    };
+
+    struct S : B1, B2 {
+      alignas(4) char c3;
+    };
+
+    S s1, s2;
+    memset(&s1, 42, sizeof(S));
+    memset(&s2, 0, sizeof(S));
+
+    s1.c1 = 'a';
+    s1.c2 = 'b';
+    s1.c3 = 'c';
+    s2.c1 = 'a';
+    s2.c2 = 'b';
+    s2.c3 = 'c';
+
+    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    __builtin_clear_padding2(&s1);
+    assert(s1.c1 == 'a');
+    assert(s1.c2 == 'b');
+    assert(s1.c3 == 'c');
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+  }
+
+  // vtable
+  {
+    struct VirtualBase {
+      unsigned int x;
+      virtual int call() { return x; };
+      virtual ~VirtualBase() = default;
+    };
+
+    struct NonVirtualBase {
+      char y;
+    };
+
+    struct S : VirtualBase, NonVirtualBase {
+      virtual int call() override { return 5; }
+      bool z;
+    };
+
+    char buff1[sizeof(S)];
+    char buff2[sizeof(S)];
+    memset(buff1, 0, sizeof(S));
+    memset(buff2, 42, sizeof(S));
+
+    S* s1 = new (&buff1) S;
+    S* s2 = new (&buff2) S;
+
+    s1->x = 0xFFFFFFFF;
+    s2->x = 0xFFFFFFFF;
+    s1->y = 'a';
+    s2->y = 'a';
+    s1->z = true;
+    s2->z = true;
+    __builtin_clear_padding2(s2);
+    assert(s2->x == 0xFFFFFFFF);
+    assert(s2->y == 'a');
+    assert(s2->z == true);
+    assert(s2->call() == 5);
+    assert(memcmp(s1, s2, sizeof(S)) == 0);
+  }
+
+  // multiple bases with vtable
+  {
+    struct VirtualBase1 {
+      unsigned int x1;
+      virtual int call1() { return x1; };
+      virtual ~VirtualBase1() = default;
+    };
+
+    struct VirtualBase2 {
+      unsigned int x2;
+      virtual int call2() { return x2; };
+      virtual ~VirtualBase2() = default;
+    };
+
+    struct VirtualBase3 {
+      unsigned int x3;
+      virtual int call3() { return x3; };
+      virtual ~VirtualBase3() = default;
+    };
+
+    struct NonVirtualBase {
+      char y;
+    };
+
+    struct S : VirtualBase1, VirtualBase2, NonVirtualBase, VirtualBase3 {
+      virtual int call1() override { return 5; }
+      bool z;
+    };
+
+    char buff1[sizeof(S)];
+    char buff2[sizeof(S)];
+    memset(buff1, 0, sizeof(S));
+    memset(buff2, 42, sizeof(S));
+
+    S* s1 = new (&buff1) S;
+    S* s2 = new (&buff2) S;
+
+    s1->x1 = 0xFFFFFFFF;
+    s2->x1 = 0xFFFFFFFF;
+    s1->x2 = 0xFAFAFAFA;
+    s2->x2 = 0xFAFAFAFA;
+    s1->x3 = 0xAAAAAAAA;
+    s2->x3 = 0xAAAAAAAA;
+    s1->y  = 'a';
+    s2->y  = 'a';
+    s1->z  = true;
+    s2->z  = true;
+    __builtin_clear_padding2(s2);
+    assert(s2->x1 == 0xFFFFFFFF);
+    assert(s2->x2 == 0xFAFAFAFA);
+    assert(s2->x3 == 0xAAAAAAAA);
+    assert(s2->y == 'a');
+    assert(s2->z == true);
+    assert(s2->call1() == 5);
+    assert(memcmp(s1, s2, sizeof(S)) == 0);
+  }
+
+  // chain of bases with virtual functions
+  {
+    struct VirtualBase1 {
+      unsigned int x1;
+      virtual int call1() { return x1; };
+      virtual ~VirtualBase1() = default;
+    };
+
+    struct VirtualBase2 : VirtualBase1 {
+      unsigned int x2;
+      virtual int call2() { return x2; };
+      virtual ~VirtualBase2() = default;
+    };
+
+    struct VirtualBase3 : VirtualBase2 {
+      unsigned int x3;
+      virtual int call3() { return x3; };
+      virtual ~VirtualBase3() = default;
+    };
+
+    struct NonVirtualBase {
+      char y;
+    };
+
+    struct S : NonVirtualBase, VirtualBase3 {
+      //virtual int call() override { return 5; }
+      bool z;
+    };
+
+    char buff1[sizeof(S)];
+    char buff2[sizeof(S)];
+    memset(buff1, 0, sizeof(S));
+    memset(buff2, 42, sizeof(S));
+    S* s1 = new (&buff1) S;
+    S* s2 = new (&buff2) S;
+
+    s1->x1 = 0xFFFFFFFF;
+    s2->x1 = 0xFFFFFFFF;
+    s1->x2 = 0xFAFAFAFA;
+    s2->x2 = 0xFAFAFAFA;
+    s1->x3 = 0xAAAAAAAA;
+    s2->x3 = 0xAAAAAAAA;
+    s1->y  = 'a';
+    s2->y  = 'a';
+    s1->z  = true;
+    s2->z  = true;
+    __builtin_clear_padding2(s2);
+    assert(memcmp(s1, s2, sizeof(S)) == 0);
+  }
+
+  // virtual inheritance
+  {
+    struct Base {
+      int x;
+    };
+    struct D1 : virtual Base {
+      int d1;
+      bool b1;
+    };
+    struct D2 : virtual Base {
+      int d2;
+      bool b2;
+    };
+
+    struct S : D1, D2 {
+      bool s;
+    };
+
+    char buff1[sizeof(S)];
+    char buff2[sizeof(S)];
+    memset(buff1, 0, sizeof(S));
+    memset(buff2, 42, sizeof(S));
+    S* s1 = new (&buff1) S;
+    S* s2 = new (&buff2) S;
+
+    s1->x  = 0xFFFFFFFF;
+    s2->x  = 0xFFFFFFFF;
+    s1->d1 = 0xFAFAFAFA;
+    s2->d1 = 0xFAFAFAFA;
+    s1->d2 = 0xAAAAAAAA;
+    s2->d2 = 0xAAAAAAAA;
+    s1->b1 = true;
+    s2->b1 = true;
+    s1->b2 = true;
+    s2->b2 = true;
+    s1->s  = true;
+    s2->s  = true;
+    __builtin_clear_padding2(s2);
+    assert(memcmp(s1, s2, sizeof(S)) == 0);
+  }
+
+  // bit fields
+  {
+    struct S {
+      // will usually occupy 2 bytes:
+      unsigned char b1 : 3; // 1st 3 bits (in 1st byte) are b1
+      unsigned char    : 2; // next 2 bits (in 1st byte) are blocked out as unused
+      unsigned char b2 : 6; // 6 bits for b2 - doesn't fit into the 1st byte => starts a 2nd
+      unsigned char b3 : 2; // 2 bits for b3 - next (and final) bits in the 2nd byte
+    };
+
+    S s1, s2;
+    memset(&s1, 0, sizeof(S));
+    memset(&s2, 42, sizeof(S));
+
+    s1.b1 = 5;
+    s2.b1 = 5;
+    s1.b2 = 27;
+    s2.b2 = 27;
+    s1.b3 = 3;
+    s2.b3 = 3;
+    __builtin_clear_padding(&s2);
+    print_bytes(&s1);
+    print_bytes(&s2);
+    //TODO
+    //assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+  }
+
+  testAllStructsForType<32, 16, char>(11, 22, 33, 44);
+  testAllStructsForType<64, 32, char>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, volatile char>(11, 22, 33, 44);
+  testAllStructsForType<64, 32, volatile char>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, int>(0, 1, 2, 3);
+  testAllStructsForType<64, 32, int>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, volatile int>(0, 1, 2, 3);
+  testAllStructsForType<64, 32, volatile int>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, double>(0, 1, 2, 3);
+  testAllStructsForType<64, 32, double>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, _BitInt(28)>(0, 1, 2, 3);
+  testAllStructsForType<64, 32, _BitInt(28)>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, _BitInt(60)>(0, 1, 2, 3);
+  testAllStructsForType<64, 32, _BitInt(60)>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, _BitInt(64)>(0, 1, 2, 3);
+  testAllStructsForType<64, 32, _BitInt(64)>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, Foo>(Foo{1, 2}, Foo{3, 4}, Foo{1, 2}, Foo{3, 4});
+  testAllStructsForType<64, 32, Foo>(Foo{1, 2}, Foo{3, 4}, Foo{1, 2}, Foo{3, 4});
+  testAllStructsForType<256, 128, Float3Vec>(0, 1, 2, 3);
+  testAllStructsForType<128, 128, Float3Vec>(4, 5, 6, 7);
+  testAllStructsForType<256, 128, Float4Vec>(0, 1, 2, 3);
+  testAllStructsForType<128, 128, Float4Vec>(4, 5, 6, 7);
+
+  otherStructTests();
+}
+
+void unionTests() {
+  // different length, do not clear object repr bits of non-active member
+  {
+    union u {
+      int i;
+      char c;
+    };
+
+    u u1, u2;
+    memset(&u1, 42, sizeof(u));
+    memset(&u2, 42, sizeof(u));
+    u1.c = '4';
+    u2.c = '4';
+
+    __builtin_clear_padding2(&u1); // should have no effect
+    assert(u1.c == '4');
+
+    assert(memcmp(&u1, &u2, sizeof(u)) == 0);
+  }
+
+  // tail padding of longest member
+  {
+    struct s {
+      alignas(8) char c1;
+    };
+
+    union u {
+      s s1;
+      char c2;
+    };
+
+    u u1, u2;
+    memset(&u1, 42, sizeof(u));
+    memset(&u2, 0, sizeof(u));
+
+    u1.s1.c1 = '4';
+    u2.s1.c1 = '4';
+
+    assert(memcmp(&u1, &u2, sizeof(u)) != 0);
+    __builtin_clear_padding2(&u1);
+    assert(u1.s1.c1 == '4');
+    assert(memcmp(&u1, &u2, sizeof(u)) == 0);
+  }
+}
+
+void arrayTests() {
+  // no padding
+  {
+    int i1[2] = {1, 2};
+    int i2[2] = {1, 2};
+
+    __builtin_clear_padding2(&i1);
+    assert(i1[0] == 1);
+    assert(i1[1] == 2);
+    assert(memcmp(&i1, &i2, 2 * sizeof(int)) == 0);
+  }
+
+  // long double
+  {
+    long double d1[2], d2[2];
+    memset(&d1, 42, 2 * sizeof(long double));
+    memset(&d2, 0, 2 * sizeof(long double));
+
+    d1[0] = 3.0L;
+    d1[1] = 4.0L;
+    d2[0] = 3.0L;
+    d2[1] = 4.0L;
+
+    __builtin_clear_padding2(&d1);
+    assert(d1[0] == 3.0L);
+    assert(d2[1] == 4.0L);
+    assert(memcmp(&d1, &d2, 2 * sizeof(long double)) == 0);
+  }
+
+  // struct
+  {
+    struct S {
+      int i1;
+      char c1;
+      int i2;
+      char c2;
+    };
+
+    S s1[2], s2[2];
+    memset(&s1, 42, 2 * sizeof(S));
+    memset(&s2, 0, 2 * sizeof(S));
+
+    s1[0].i1 = 1;
+    s1[0].c1 = 'a';
+    s1[0].i2 = 2;
+    s1[0].c2 = 'b';
+    s1[1].i1 = 3;
+    s1[1].c1 = 'c';
+    s1[1].i2 = 4;
+    s1[1].c2 = 'd';
+
+    s2[0].i1 = 1;
+    s2[0].c1 = 'a';
+    s2[0].i2 = 2;
+    s2[0].c2 = 'b';
+    s2[1].i1 = 3;
+    s2[1].c1 = 'c';
+    s2[1].i2 = 4;
+    s2[1].c2 = 'd';
+
+    assert(memcmp(&s1, &s2, 2 * sizeof(S)) != 0);
+    __builtin_clear_padding2(&s1);
+
+    assert(s1[0].i1 == 1);
+    assert(s1[0].c1 == 'a');
+    assert(s1[0].i2 == 2);
+    assert(s1[0].c2 == 'b');
+    assert(s1[1].i1 == 3);
+    assert(s1[1].c1 == 'c');
+    assert(s1[1].i2 == 4);
+    assert(s1[1].c2 == 'd');
+    assert(memcmp(&s1, &s2, 2 * sizeof(S)) == 0);
+  }
+}
+
+int main(int, const char**) {
+  primitiveTests();
+  unionTests();
+  structTests();
+  arrayTests();
+
+  return 0;
+}

>From d9beac0d6aa43072568c4639311cc29642c744a9 Mon Sep 17 00:00:00 2001
From: Hui <hui.xie0621 at gmail.com>
Date: Sun, 16 Jun 2024 00:05:25 +0100
Subject: [PATCH 02/22] new approach

---
 clang/lib/CodeGen/CGBuiltin.cpp               | 411 ++++++++++--------
 .../atomics/builtin_clear_padding.pass.cpp    | 181 ++++----
 2 files changed, 333 insertions(+), 259 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 492d4a5674254..428c30bef94e1 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -40,6 +40,9 @@
 #include <algorithm>
 #include <optional>
 #include <utility>
+#include <deque>
+#include <vector>
+#include <sstream>
 
 using namespace clang;
 using namespace CodeGen;
@@ -2701,204 +2704,254 @@ RValue CodeGenFunction::emitStdcFirstBit(const CallExpr *E, Intrinsic::ID IntID,
   return RValue::get(Result);
 }
 
-template <class T>
-void RecursivelyClearPaddingImpl(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
-                                 size_t CurrentStartOffset,
-                                 size_t &RunningOffset, T &&WriteZeroAtOffset,
-                                 bool VisitVirtualBase);
-
-template <class T>
-void ClearPaddingStruct(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
-                        StructType *ST, size_t CurrentStartOffset,
-                        size_t &RunningOffset, T &&WriteZeroAtOffset,
-                        bool VisitVirtualBase) {
-  llvm::dbgs() << "clear padding struct: " << ST->getName().data() << '\n';
-  const auto &DL = CGF.CGM.getModule().getDataLayout();
-  auto *SL = DL.getStructLayout(ST);
-  auto *R = dyn_cast<CXXRecordDecl>(Ty->getAsRecordDecl());
-  if (!R) {
-    llvm::dbgs() << "Not a CXXRecordDecl\n";
-    return;
-  }
-  const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R);
-  if (ASTLayout.hasOwnVFPtr()) {
-    llvm::dbgs() << "vtable ptr. Incrementing RunningOffset from "
-                 << RunningOffset << " to "
-                 << RunningOffset + DL.getPointerSizeInBits() / 8 << '\n';
-    RunningOffset += DL.getPointerSizeInBits() / 8;
-  }
-  std::vector<std::pair<size_t, CXXBaseSpecifier>> Bases;
-  Bases.reserve(R->getNumBases());
-  // todo get vbases
-  for (auto Base : R->bases()) {
-    auto *BaseRecord = cast<CXXRecordDecl>(Base.getType()->getAsRecordDecl());
-    if (!Base.isVirtual()) {
-      auto Offset = static_cast<size_t>(
-          ASTLayout.getBaseClassOffset(BaseRecord).getQuantity());
-      Bases.emplace_back(Offset, Base);
+namespace {
+
+struct PaddingClearer {
+  PaddingClearer(CodeGenFunction &F)
+      : CGF(F), CharWidth(CGF.getContext().getCharWidth()) {}
+
+  void run(Value *Ptr, QualType Ty) {
+    OccuppiedIntervals.clear();
+    Queue.clear();
+
+    Queue.push_back(Data{0, Ty, true});
+    while (!Queue.empty()) {
+      auto Current = Queue.front();
+      Queue.pop_front();
+      Visit(Current);
+    }
+
+    MergeOccuppiedIntervals();
+    auto PaddingIntervals =
+        GetPaddingIntervals(CGF.getContext().getTypeSize(Ty));
+    llvm::dbgs() << "Occuppied Bits:\n";
+    for (auto [first, last] : OccuppiedIntervals) {
+      llvm::dbgs() << "[" << first << ", " << last << ")\n";
+    }
+    llvm::dbgs() << "Padding Bits:\n";
+    for (auto [first, last] : PaddingIntervals) {
+      llvm::dbgs() << "[" << first << ", " << last << ")\n";
+    }
+
+    for (const auto &Interval : PaddingIntervals) {
+      ClearPadding(Ptr, Interval);
     }
   }
 
-  auto VisitBases =
-      [&](std::vector<std::pair<size_t, CXXBaseSpecifier>> &BasesToVisit) {
-        std::sort(
-            BasesToVisit.begin(), BasesToVisit.end(),
-            [](const auto &P1, const auto &P2) { return P1.first < P2.first; });
-        for (const auto &Pair : BasesToVisit) {
-          // is it OK to use structured binding in clang? what is the language
-          // version?
-          auto Offset = Pair.first;
-          auto Base = Pair.second;
-
-          llvm::dbgs() << "visiting base at offset " << Offset << '\n';
-          // Recursively zero out base classes.
-          auto Index = SL->getElementContainingOffset(Offset);
-          Value *Idx = CGF.Builder.getSize(Index);
-          llvm::Type *CurrentBaseType = CGF.ConvertTypeForMem(Base.getType());
-          Value *BaseElement = CGF.Builder.CreateGEP(CurrentBaseType, Ptr, Idx);
-          RecursivelyClearPaddingImpl(CGF, BaseElement, Base.getType(),
-                                      CurrentStartOffset + Offset,
-                                      RunningOffset, WriteZeroAtOffset, false);
-        }
-      };
+private:
+  struct BitInterval {
+    // [First, Last)
+    uint64_t First;
+    uint64_t Last;
+  };
 
-  VisitBases(Bases);
-
-  size_t NumFields = std::distance(R->field_begin(), R->field_end());
-  std::vector<size_t> FieldOffsets;
-  FieldOffsets.reserve(NumFields);
-  auto CurrentField = R->field_begin();
-  for (size_t I = 0; I < NumFields; ++I, ++CurrentField) {
-    // Size needs to be in bytes so we can compare it later.
-    auto Offset = ASTLayout.getFieldOffset(I) / 8;
-    llvm::dbgs() << "visiting field at offset " << Offset << '\n';
-    auto Index = SL->getElementContainingOffset(Offset);
-    Value *Idx = CGF.Builder.getSize(Index);
-    llvm::Type *CurrentFieldType =
-        CGF.ConvertTypeForMem(CurrentField->getType());
-    Value *Element = CGF.Builder.CreateGEP(CurrentFieldType, Ptr, Idx);
-    RecursivelyClearPaddingImpl(CGF, Element, CurrentField->getType(),
-                                CurrentStartOffset + Offset, RunningOffset,
-                                WriteZeroAtOffset, true);
-  }
-
-  if (VisitVirtualBase) {
-
-    std::vector<std::pair<size_t, CXXBaseSpecifier>> VBases;
-    VBases.reserve(R->getNumVBases());
-    for (auto VBase : R->vbases()) {
-      auto *BaseRecord =
-          cast<CXXRecordDecl>(VBase.getType()->getAsRecordDecl());
-      auto Offset = static_cast<size_t>(
-          ASTLayout.getVBaseClassOffset(BaseRecord).getQuantity());
-      VBases.emplace_back(Offset, VBase);
+  struct Data {
+    uint64_t StartBitOffset;
+    QualType Ty;
+    bool VisitVirtualBase;
+  };
+
+  void Visit(Data const &D) {
+    if (auto *AT = dyn_cast<ConstantArrayType>(D.Ty)) {
+      VisitArray(AT, D.StartBitOffset);
+      return;
+    }
+
+    if (auto *Record = D.Ty->getAsCXXRecordDecl()) {
+      VisitStruct(Record, D.StartBitOffset, D.VisitVirtualBase);
+      return;
+    }
+
+    if (D.Ty->isAtomicType()) {
+      auto Unwrapped = D;
+      Unwrapped.Ty = D.Ty.getAtomicUnqualifiedType();
+      Queue.push_back(Unwrapped);
+      return;
+    }
+
+    if (const auto *Complex = D.Ty->getAs<ComplexType>()) {
+      VisitComplex(Complex, D.StartBitOffset);
+      return;
     }
 
-    VisitBases(VBases);
+    auto *Type = CGF.ConvertTypeForMem(D.Ty);
+    auto SizeBit = CGF.CGM.getModule()
+                       .getDataLayout()
+                       .getTypeSizeInBits(Type)
+                       .getKnownMinValue();
+    llvm::dbgs() << "clear_padding primitive type. adding Interval ["
+                 << D.StartBitOffset << ", " << D.StartBitOffset + SizeBit
+                 << ")\n";
+    OccuppiedIntervals.push_back(
+        BitInterval{D.StartBitOffset, D.StartBitOffset + SizeBit});
+  }
+
+  void VisitArray(const ConstantArrayType *AT, uint64_t StartBitOffset) {
+    llvm::dbgs() << "clear_padding visiting constant array starting from "
+                 << StartBitOffset << "\n";
+    for (uint64_t ArrIndex = 0; ArrIndex < AT->getSize().getLimitedValue();
+         ++ArrIndex) {
+
+      QualType ElementQualType = AT->getElementType();
+      auto ElementSize = CGF.getContext().getTypeSizeInChars(ElementQualType);
+      auto ElementAlign = CGF.getContext().getTypeAlignInChars(ElementQualType);
+      auto Offset = ElementSize.alignTo(ElementAlign);
+
+      Queue.push_back(
+          Data{StartBitOffset + ArrIndex * Offset.getQuantity() * CharWidth,
+               ElementQualType, true});
+    }
   }
-}
 
-template <class T>
-void ClearPaddingConstantArray(CodeGenFunction &CGF, Value *Ptr,
-                               llvm::Type *Type, ConstantArrayType const *AT,
-                               size_t CurrentStartOffset, size_t &RunningOffset,
-                               T &&WriteZeroAtOffset) {
-  llvm::dbgs() << "clear padding constant array\n";
-  for (size_t ArrIndex = 0; ArrIndex < AT->getSize().getLimitedValue();
-       ++ArrIndex) {
+  void VisitStruct(const CXXRecordDecl *R, uint64_t StartBitOffset,
+                   bool VisitVirtualBase) {
+    llvm::dbgs() << "clear_padding visiting struct: "
+                 << R->getQualifiedNameAsString() << " starting from offset "
+                 << StartBitOffset << '\n';
+    const auto &DL = CGF.CGM.getModule().getDataLayout();
+
+    const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R);
+    if (ASTLayout.hasOwnVFPtr()) {
+      llvm::dbgs()
+          << "clear_padding found vtable ptr. Adding occuppied interval ["
+          << StartBitOffset << ", "
+          << (StartBitOffset + DL.getPointerSizeInBits()) << ")\n";
+      OccuppiedIntervals.push_back(BitInterval{
+          StartBitOffset, StartBitOffset + DL.getPointerSizeInBits()});
+    }
 
-    QualType ElementQualType = AT->getElementType();
+    const auto VisitBase = [&ASTLayout, StartBitOffset, this](
+                               const CXXBaseSpecifier &Base, auto GetOffset) {
+      auto *BaseRecord = Base.getType()->getAsCXXRecordDecl();
+      if (!BaseRecord) {
+        llvm::dbgs() << "Base is not a CXXRecord!\n";
+        return;
+      }
+      auto BaseOffset =
+          std::invoke(GetOffset, ASTLayout, BaseRecord).getQuantity();
 
-    auto *ElementRecord = ElementQualType->getAsRecordDecl();
-    if (!ElementRecord) {
-      llvm::dbgs() << "null!\n";
+      llvm::dbgs() << "visiting base at offset " << StartBitOffset << " + "
+                   << BaseOffset * CharWidth << '\n';
+      Queue.push_back(
+          Data{StartBitOffset + BaseOffset * CharWidth, Base.getType(), false});
+    };
+
+    for (auto Base : R->bases()) {
+      if (!Base.isVirtual()) {
+        VisitBase(Base, &ASTRecordLayout::getBaseClassOffset);
+      }
+    }
+
+    if (VisitVirtualBase) {
+      for (auto VBase : R->vbases()) {
+        VisitBase(VBase, &ASTRecordLayout::getVBaseClassOffset);
+      }
+    }
+
+    for (auto *Field : R->fields()) {
+      auto FieldOffset = ASTLayout.getFieldOffset(Field->getFieldIndex());
+      llvm::dbgs() << "visiting field at offset " << StartBitOffset << " + "
+                   << FieldOffset << '\n';
+      if (Field->isBitField()) {
+        llvm::dbgs() << "clear_padding found bit field. Adding Interval ["
+                     << StartBitOffset + FieldOffset << " , "
+                     << FieldOffset + Field->getBitWidthValue(CGF.getContext())
+                     << ")\n";
+        OccuppiedIntervals.push_back(
+            BitInterval{StartBitOffset + FieldOffset,
+                        StartBitOffset + FieldOffset +
+                            Field->getBitWidthValue(CGF.getContext())});
+      } else {
+        Queue.push_back(
+            Data{StartBitOffset + FieldOffset, Field->getType(), true});
+      }
     }
-    auto ElementAlign =
-        ElementRecord
-            ? CGF.getContext().getASTRecordLayout(ElementRecord).getAlignment()
-            : CGF.getContext().getTypeAlignInChars(ElementQualType);
-
-    Address FieldElementAddr{Ptr, Type, ElementAlign};
-
-    auto Element = CGF.Builder.CreateConstArrayGEP(FieldElementAddr, ArrIndex);
-    auto *ElementType = CGF.ConvertTypeForMem(ElementQualType);
-    auto AllocSize =
-        CGF.CGM.getModule().getDataLayout().getTypeAllocSize(ElementType);
-    llvm::dbgs() << "clearing array index! " << ArrIndex << '\n';
-    RecursivelyClearPaddingImpl(CGF, Element.getBasePointer(), ElementQualType,
-                                CurrentStartOffset +
-                                    ArrIndex * AllocSize.getKnownMinValue(),
-                                RunningOffset, WriteZeroAtOffset, true);
   }
-}
 
-template <class T>
-void RecursivelyClearPaddingImpl(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
-                                 size_t CurrentStartOffset,
-                                 size_t &RunningOffset, T &&WriteZeroAtOffset,
-                                 bool VisitVirtualBase) {
-
-  llvm::dbgs() << "clear padding before current  [" << RunningOffset << ", "
-               << CurrentStartOffset << ")\n";
-  for (; RunningOffset < CurrentStartOffset; ++RunningOffset) {
-    WriteZeroAtOffset(RunningOffset);
-  }
-  auto *Type = CGF.ConvertTypeForMem(Ty);
-  auto Size = CGF.CGM.getModule()
-                  .getDataLayout()
-                  .getTypeSizeInBits(Type)
-                  .getKnownMinValue() /
-              8;
-
-  if (auto *AT = dyn_cast<ConstantArrayType>(Ty)) {
-    ClearPaddingConstantArray(CGF, Ptr, Type, AT, CurrentStartOffset,
-                              RunningOffset, WriteZeroAtOffset);
-  } else if (auto *ST = dyn_cast<StructType>(Type); ST && Ty->isRecordType()) {
-    ClearPaddingStruct(CGF, Ptr, Ty, ST, CurrentStartOffset, RunningOffset,
-                       WriteZeroAtOffset, VisitVirtualBase);
-  } else if (Ty->isAtomicType()) {
-    RecursivelyClearPaddingImpl(CGF, Ptr, Ty.getAtomicUnqualifiedType(),
-                                CurrentStartOffset, RunningOffset,
-                                WriteZeroAtOffset, true);
-  } else {
-    llvm::dbgs() << "increment running offset from: " << RunningOffset << " to "
-                 << RunningOffset + Size << '\n';
-    RunningOffset =
-        std::max(RunningOffset, CurrentStartOffset + static_cast<size_t>(Size));
+  void VisitComplex(const ComplexType *CT, uint64_t StartBitOffset) {
+    QualType ElementQualType = CT->getElementType();
+    auto ElementSize = CGF.getContext().getTypeSizeInChars(ElementQualType);
+    auto ElementAlign = CGF.getContext().getTypeAlignInChars(ElementQualType);
+    auto ImgOffset = ElementSize.alignTo(ElementAlign);
+
+    llvm::dbgs() << "clear_padding visiting Complex Type. Real from "
+                 << StartBitOffset << "Img from "
+                 << StartBitOffset + ImgOffset.getQuantity() * CharWidth
+                 << "\n";
+    Queue.push_back(Data{StartBitOffset, ElementQualType, true});
+    Queue.push_back(Data{StartBitOffset + ImgOffset.getQuantity() * CharWidth,
+                         ElementQualType, true});
   }
-}
 
-static void RecursivelyClearPadding(CodeGenFunction &CGF, Value *Ptr,
-                                    QualType Ty) {
-  auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
-  auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
-  auto WriteZeroAtOffset = [&](uint64_t Offset) {
-    auto *Index = ConstantInt::get(CGF.IntTy, Offset);
-    auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
-    CGF.Builder.CreateAlignedStore(
-        Zero, Element,
-        CharUnits::One().alignmentAtOffset(CharUnits::fromQuantity(Offset)));
-  };
+  void MergeOccuppiedIntervals() {
+    std::sort(OccuppiedIntervals.begin(), OccuppiedIntervals.end(),
+              [](const BitInterval &lhs, const BitInterval &rhs) {
+                return std::tie(lhs.First, lhs.Last) <
+                       std::tie(rhs.First, rhs.Last);
+              });
 
-  size_t RunningOffset = 0;
+    std::vector<BitInterval> Merged;
+    Merged.reserve(OccuppiedIntervals.size());
 
-  RecursivelyClearPaddingImpl(CGF, Ptr, Ty, 0, RunningOffset, WriteZeroAtOffset,
-                              true);
+    for (const BitInterval &NextInterval : OccuppiedIntervals) {
+      if (Merged.empty()) {
+        Merged.push_back(NextInterval);
+        continue;
+      }
+      auto &LastInterval = Merged.back();
 
-  // Clear tail padding
-  auto *Type = CGF.ConvertTypeForMem(Ty);
+      if (NextInterval.First > LastInterval.Last) {
+        Merged.push_back(NextInterval);
+      } else {
+        LastInterval.Last = std::max(LastInterval.Last, NextInterval.Last);
+      }
+    }
 
-  auto Size = CGF.CGM.getModule()
-                  .getDataLayout()
-                  .getTypeAllocSize(Type)
-                  .getKnownMinValue();
+    OccuppiedIntervals = Merged;
+  }
 
-  llvm::dbgs() << "clear tail padding  [" << RunningOffset << ", " << Size
-               << ")\n";
-  for (; RunningOffset < Size; ++RunningOffset) {
-    WriteZeroAtOffset(RunningOffset);
+  std::vector<BitInterval> GetPaddingIntervals(uint64_t SizeInBits) const {
+    std::vector<BitInterval> Results;
+    if (OccuppiedIntervals.size() == 1 &&
+        OccuppiedIntervals.front().First == 0 &&
+        OccuppiedIntervals.end()->Last == SizeInBits) {
+      return Results;
+    }
+    Results.reserve(OccuppiedIntervals.size() + 1);
+    uint64_t CurrentPos = 0;
+    for (const BitInterval &OccupiedInterval : OccuppiedIntervals) {
+      if (OccupiedInterval.First > CurrentPos) {
+        Results.push_back(BitInterval{CurrentPos, OccupiedInterval.First});
+      }
+      CurrentPos = OccupiedInterval.Last;
+    }
+    if (SizeInBits > CurrentPos) {
+      Results.push_back(BitInterval{CurrentPos, SizeInBits});
+    }
+    return Results;
+  }
+
+  void ClearPadding(Value *Ptr, const BitInterval &PaddingInteval) {
+    // TODO: support clearning non-one-byte clearing
+    auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
+    auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
+    for (auto Offset = PaddingInteval.First / CharWidth;
+         Offset < PaddingInteval.Last / CharWidth; ++Offset) {
+      auto *Index = ConstantInt::get(CGF.IntTy, Offset);
+      auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+      CGF.Builder.CreateAlignedStore(
+          Zero, Element,
+          CharUnits::One().alignmentAtOffset(CharUnits::fromQuantity(Offset)));
+    }
   }
-}
+
+  CodeGenFunction &CGF;
+  const uint64_t CharWidth;
+  std::deque<Data> Queue;
+  std::vector<BitInterval> OccuppiedIntervals;
+};
+
+} // namespace
 
 RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
                                         const CallExpr *E,
@@ -5395,7 +5448,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     const Expr *Op = E->getArg(0);
     Value *Address = EmitScalarExpr(Op);
     auto PointeeTy = Op->getType()->getPointeeType();
-    RecursivelyClearPadding(*this, Address, PointeeTy);
+    PaddingClearer clearer{*this};
+    clearer.run(Address, PointeeTy);
+    //RecursivelyClearPadding(*this, Address, PointeeTy);
     return RValue::get(nullptr);
   }
   case Builtin::BI__sync_fetch_and_add:
diff --git a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
index d504ac58e43ae..49c57b1473447 100644
--- a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
+++ b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
@@ -30,6 +30,12 @@ void print_bytes(const T* object) {
 template <class T>
 void __builtin_clear_padding2(T t) {
   __builtin_clear_padding(t);
+  (void)t;
+}
+
+void assert2(bool b){
+  assert(b);
+  (void)b;
 }
 
 template <size_t A1, size_t A2, class T>
@@ -98,9 +104,9 @@ void testAllStructsForType(T a, T b, T c, T d) {
     memset(&basic2, 42, sizeof(B));
     basic2.x = a;
     basic2.y = b;
-    assert(memcmp(&basic1, &basic2, sizeof(B)) != 0);
+    assert2(memcmp(&basic1, &basic2, sizeof(B)) != 0);
     __builtin_clear_padding2(&basic2);
-    assert(memcmp(&basic1, &basic2, sizeof(B)) == 0);
+    assert2(memcmp(&basic1, &basic2, sizeof(B)) == 0);
   }
 
   // spaced array
@@ -119,9 +125,9 @@ void testAllStructsForType(T a, T b, T c, T d) {
     arr2.y[0] = c;
     arr2.y[1] = d;
     arr2.c    = 0;
-    assert(memcmp(&arr1, &arr2, sizeof(A)) != 0);
+    assert2(memcmp(&arr1, &arr2, sizeof(A)) != 0);
     __builtin_clear_padding2(&arr2);
-    assert(memcmp(&arr1, &arr2, sizeof(A)) == 0);
+    assert2(memcmp(&arr1, &arr2, sizeof(A)) == 0);
   }
 
   // pointer members
@@ -135,9 +141,9 @@ void testAllStructsForType(T a, T b, T c, T d) {
     memset(&ptr2, 42, sizeof(P));
     ptr2.x = &a;
     ptr2.y = &b;
-    assert(memcmp(&ptr1, &ptr2, sizeof(P)) != 0);
+    assert2(memcmp(&ptr1, &ptr2, sizeof(P)) != 0);
     __builtin_clear_padding2(&ptr2);
-    assert(memcmp(&ptr1, &ptr2, sizeof(P)) == 0);
+    assert2(memcmp(&ptr1, &ptr2, sizeof(P)) == 0);
   }
 
   // three members
@@ -154,7 +160,7 @@ void testAllStructsForType(T a, T b, T c, T d) {
     three2.y = b;
     three2.z = c;
     __builtin_clear_padding2(&three2);
-    assert(memcmp(&three1, &three2, sizeof(Three)) == 0);
+    assert2(memcmp(&three1, &three2, sizeof(Three)) == 0);
   }
 
   // Normal struct no padding
@@ -169,7 +175,7 @@ void testAllStructsForType(T a, T b, T c, T d) {
     normal2.a = a;
     normal2.b = b;
     __builtin_clear_padding2(&normal2);
-    assert(memcmp(&normal1, &normal2, sizeof(N)) == 0);
+    assert2(memcmp(&normal1, &normal2, sizeof(N)) == 0);
   }
 
   // base class
@@ -189,9 +195,9 @@ void testAllStructsForType(T a, T b, T c, T d) {
     base2.x = c;
     base2.y = d;
     base2.z = a;
-    assert(memcmp(&base1, &base2, sizeof(H)) != 0);
+    assert2(memcmp(&base1, &base2, sizeof(H)) != 0);
     __builtin_clear_padding2(&base2);
-    assert(memcmp(&base1, &base2, sizeof(H)) == 0);
+    assert2(memcmp(&base1, &base2, sizeof(H)) == 0);
   }
 }
 
@@ -220,10 +226,10 @@ void otherStructTests() {
     u2->buf[1] = 2;
     u2->buf[2] = 3;
     u2->buf[3] = 4;
-    assert(memcmp(u1, u2, sizeof(UnsizedTail)) != 0);
+    assert2(memcmp(u1, u2, sizeof(UnsizedTail)) != 0);
     __builtin_clear_padding2(u2);
 
-    assert(memcmp(u1, u2, sizeof(UnsizedTail)) == 0);
+    assert2(memcmp(u1, u2, sizeof(UnsizedTail)) == 0);
   }
 
   // basic padding on the heap
@@ -237,9 +243,9 @@ void otherStructTests() {
     memset(basic2, 42, sizeof(B));
     basic2->x = 1;
     basic2->y = 2;
-    assert(memcmp(basic1, basic2, sizeof(B)) != 0);
+    assert2(memcmp(basic1, basic2, sizeof(B)) != 0);
     __builtin_clear_padding2(basic2);
-    assert(memcmp(basic1, basic2, sizeof(B)) == 0);
+    assert2(memcmp(basic1, basic2, sizeof(B)) == 0);
     delete basic2;
     delete basic1;
   }
@@ -255,10 +261,10 @@ void otherStructTests() {
     memset(basic4, 42, sizeof(B));
     basic4->x = 1;
     basic4->y = 2;
-    assert(memcmp(basic3, basic4, sizeof(B)) != 0);
+    assert2(memcmp(basic3, basic4, sizeof(B)) != 0);
     __builtin_clear_padding2(const_cast<volatile B*>(basic4));
     __builtin_clear_padding2(basic4);
-    assert(memcmp(basic3, basic4, sizeof(B)) == 0);
+    assert2(memcmp(basic3, basic4, sizeof(B)) == 0);
     delete basic4;
     delete basic3;
   }
@@ -277,8 +283,8 @@ void primitiveTests() {
   {
     int i1 = 42, i2 = 42;
     __builtin_clear_padding2(&i1); // does nothing
-    assert(i1 == 42);
-    assert(memcmp(&i1, &i2, sizeof(int)) == 0);
+    assert2(i1 == 42);
+    assert2(memcmp(&i1, &i2, sizeof(int)) == 0);
   }
 
   // long double
@@ -291,8 +297,20 @@ void primitiveTests() {
     d2 = 3.0L;
 
     __builtin_clear_padding2(&d1);
-    assert(d1 == 3.0L);
-    assert(memcmp(&d1, &d2, sizeof(long double)) == 0);
+    assert2(d1 == 3.0L);
+    assert2(memcmp(&d1, &d2, sizeof(long double)) == 0);
+  }
+
+  // _Complex
+  {
+    _Complex long double c1, c2;
+
+    memset(&c1, 42, sizeof(_Complex long double));
+    memset(&c2, 0, sizeof(_Complex long double));
+    c1 = 3.0L ;
+    c1 = 3.0L ;
+    __builtin_clear_padding2(&c1);
+    //TODO
   }
 }
 
@@ -320,13 +338,13 @@ void structTests() {
     s2.s.c = 'a';
     s2.b   = true;
 
-    assert(memcmp(&s1, &s2, sizeof(S2)) != 0);
+    assert2(memcmp(&s1, &s2, sizeof(S2)) != 0);
     __builtin_clear_padding2(&s1);
-    assert(s1.s.x == 4);
-    assert(s1.s.c == 'a');
-    assert(s1.b == true);
+    assert2(s1.s.x == 4);
+    assert2(s1.s.c == 'a');
+    assert2(s1.b == true);
 
-    assert(memcmp(&s1, &s2, sizeof(S2)) == 0);
+    assert2(memcmp(&s1, &s2, sizeof(S2)) == 0);
   }
 
   // struct with long double
@@ -345,11 +363,11 @@ void structTests() {
     s2.l = 3.0L;
     s2.b = true;
 
-    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    assert2(memcmp(&s1, &s2, sizeof(S)) != 0);
     __builtin_clear_padding2(&s1);
-    assert(s1.l == 3.0L);
-    assert(s1.b == true);
-    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+    assert2(s1.l == 3.0L);
+    assert2(s1.b == true);
+    assert2(memcmp(&s1, &s2, sizeof(S)) == 0);
   }
 
   // EBO
@@ -371,11 +389,11 @@ void structTests() {
     s2.i = 4;
     s2.b = true;
 
-    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    assert2(memcmp(&s1, &s2, sizeof(S)) != 0);
     __builtin_clear_padding2(&s1);
-    assert(s1.i == 4);
-    assert(s1.b == true);
-    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+    assert2(s1.i == 4);
+    assert2(s1.b == true);
+    assert2(memcmp(&s1, &s2, sizeof(S)) == 0);
   }
 
   // padding between bases
@@ -398,11 +416,11 @@ void structTests() {
     s2.c1 = 'a';
     s2.c2 = 'b';
 
-    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    assert2(memcmp(&s1, &s2, sizeof(S)) != 0);
     __builtin_clear_padding2(&s1);
-    assert(s1.c1 == 'a');
-    assert(s1.c2 == 'b');
-    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+    assert2(s1.c1 == 'a');
+    assert2(s1.c2 == 'b');
+    assert2(memcmp(&s1, &s2, sizeof(S)) == 0);
   }
 
   // padding after last base
@@ -429,12 +447,12 @@ void structTests() {
     s2.c2 = 'b';
     s2.c3 = 'c';
 
-    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    assert2(memcmp(&s1, &s2, sizeof(S)) != 0);
     __builtin_clear_padding2(&s1);
-    assert(s1.c1 == 'a');
-    assert(s1.c2 == 'b');
-    assert(s1.c3 == 'c');
-    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+    assert2(s1.c1 == 'a');
+    assert2(s1.c2 == 'b');
+    assert2(s1.c3 == 'c');
+    assert2(memcmp(&s1, &s2, sizeof(S)) == 0);
   }
 
   // vtable
@@ -469,11 +487,11 @@ void structTests() {
     s1->z = true;
     s2->z = true;
     __builtin_clear_padding2(s2);
-    assert(s2->x == 0xFFFFFFFF);
-    assert(s2->y == 'a');
-    assert(s2->z == true);
-    assert(s2->call() == 5);
-    assert(memcmp(s1, s2, sizeof(S)) == 0);
+    assert2(s2->x == 0xFFFFFFFF);
+    assert2(s2->y == 'a');
+    assert2(s2->z == true);
+    assert2(s2->call() == 5);
+    assert2(memcmp(s1, s2, sizeof(S)) == 0);
   }
 
   // multiple bases with vtable
@@ -524,13 +542,13 @@ void structTests() {
     s1->z  = true;
     s2->z  = true;
     __builtin_clear_padding2(s2);
-    assert(s2->x1 == 0xFFFFFFFF);
-    assert(s2->x2 == 0xFAFAFAFA);
-    assert(s2->x3 == 0xAAAAAAAA);
-    assert(s2->y == 'a');
-    assert(s2->z == true);
-    assert(s2->call1() == 5);
-    assert(memcmp(s1, s2, sizeof(S)) == 0);
+    assert2(s2->x1 == 0xFFFFFFFF);
+    assert2(s2->x2 == 0xFAFAFAFA);
+    assert2(s2->x3 == 0xAAAAAAAA);
+    assert2(s2->y == 'a');
+    assert2(s2->z == true);
+    assert2(s2->call1() == 5);
+    assert2(memcmp(s1, s2, sizeof(S)) == 0);
   }
 
   // chain of bases with virtual functions
@@ -580,7 +598,7 @@ void structTests() {
     s1->z  = true;
     s2->z  = true;
     __builtin_clear_padding2(s2);
-    assert(memcmp(s1, s2, sizeof(S)) == 0);
+    assert2(memcmp(s1, s2, sizeof(S)) == 0);
   }
 
   // virtual inheritance
@@ -621,7 +639,7 @@ void structTests() {
     s1->s  = true;
     s2->s  = true;
     __builtin_clear_padding2(s2);
-    assert(memcmp(s1, s2, sizeof(S)) == 0);
+    assert2(memcmp(s1, s2, sizeof(S)) == 0);
   }
 
   // bit fields
@@ -644,11 +662,12 @@ void structTests() {
     s2.b2 = 27;
     s1.b3 = 3;
     s2.b3 = 3;
-    __builtin_clear_padding(&s2);
-    print_bytes(&s1);
-    print_bytes(&s2);
+    __builtin_clear_padding2(&s2);
+    //print_bytes(&s1);
+    //print_bytes(&s2);
+    //assert(false);
     //TODO
-    //assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+    //assert2(memcmp(&s1, &s2, sizeof(S)) == 0);
   }
 
   testAllStructsForType<32, 16, char>(11, 22, 33, 44);
@@ -692,9 +711,9 @@ void unionTests() {
     u2.c = '4';
 
     __builtin_clear_padding2(&u1); // should have no effect
-    assert(u1.c == '4');
+    assert2(u1.c == '4');
 
-    assert(memcmp(&u1, &u2, sizeof(u)) == 0);
+    assert2(memcmp(&u1, &u2, sizeof(u)) == 0);
   }
 
   // tail padding of longest member
@@ -715,10 +734,10 @@ void unionTests() {
     u1.s1.c1 = '4';
     u2.s1.c1 = '4';
 
-    assert(memcmp(&u1, &u2, sizeof(u)) != 0);
+    assert2(memcmp(&u1, &u2, sizeof(u)) != 0);
     __builtin_clear_padding2(&u1);
-    assert(u1.s1.c1 == '4');
-    assert(memcmp(&u1, &u2, sizeof(u)) == 0);
+    assert2(u1.s1.c1 == '4');
+    assert2(memcmp(&u1, &u2, sizeof(u)) == 0);
   }
 }
 
@@ -729,9 +748,9 @@ void arrayTests() {
     int i2[2] = {1, 2};
 
     __builtin_clear_padding2(&i1);
-    assert(i1[0] == 1);
-    assert(i1[1] == 2);
-    assert(memcmp(&i1, &i2, 2 * sizeof(int)) == 0);
+    assert2(i1[0] == 1);
+    assert2(i1[1] == 2);
+    assert2(memcmp(&i1, &i2, 2 * sizeof(int)) == 0);
   }
 
   // long double
@@ -746,9 +765,9 @@ void arrayTests() {
     d2[1] = 4.0L;
 
     __builtin_clear_padding2(&d1);
-    assert(d1[0] == 3.0L);
-    assert(d2[1] == 4.0L);
-    assert(memcmp(&d1, &d2, 2 * sizeof(long double)) == 0);
+    assert2(d1[0] == 3.0L);
+    assert2(d2[1] == 4.0L);
+    assert2(memcmp(&d1, &d2, 2 * sizeof(long double)) == 0);
   }
 
   // struct
@@ -782,18 +801,18 @@ void arrayTests() {
     s2[1].i2 = 4;
     s2[1].c2 = 'd';
 
-    assert(memcmp(&s1, &s2, 2 * sizeof(S)) != 0);
+    assert2(memcmp(&s1, &s2, 2 * sizeof(S)) != 0);
     __builtin_clear_padding2(&s1);
 
-    assert(s1[0].i1 == 1);
-    assert(s1[0].c1 == 'a');
-    assert(s1[0].i2 == 2);
-    assert(s1[0].c2 == 'b');
-    assert(s1[1].i1 == 3);
-    assert(s1[1].c1 == 'c');
-    assert(s1[1].i2 == 4);
-    assert(s1[1].c2 == 'd');
-    assert(memcmp(&s1, &s2, 2 * sizeof(S)) == 0);
+    assert2(s1[0].i1 == 1);
+    assert2(s1[0].c1 == 'a');
+    assert2(s1[0].i2 == 2);
+    assert2(s1[0].c2 == 'b');
+    assert2(s1[1].i1 == 3);
+    assert2(s1[1].c1 == 'c');
+    assert2(s1[1].i2 == 4);
+    assert2(s1[1].c2 == 'd');
+    assert2(memcmp(&s1, &s2, 2 * sizeof(S)) == 0);
   }
 }
 

>From 1bdbbf2a03c11b7a20d3b0691ee72b467e050fa6 Mon Sep 17 00:00:00 2001
From: Hui <hui.xie0621 at gmail.com>
Date: Sun, 23 Jun 2024 19:33:23 +0100
Subject: [PATCH 03/22] handle bit field

---
 clang/lib/CodeGen/CGBuiltin.cpp               | 80 ++++++++++++++++---
 .../atomics/builtin_clear_padding.pass.cpp    | 26 +++---
 2 files changed, 80 insertions(+), 26 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 428c30bef94e1..78fc3cf025cd5 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2931,20 +2931,76 @@ struct PaddingClearer {
     return Results;
   }
 
-  void ClearPadding(Value *Ptr, const BitInterval &PaddingInteval) {
-    // TODO: support clearning non-one-byte clearing
-    auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
-    auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
-    for (auto Offset = PaddingInteval.First / CharWidth;
-         Offset < PaddingInteval.Last / CharWidth; ++Offset) {
-      auto *Index = ConstantInt::get(CGF.IntTy, Offset);
-      auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
-      CGF.Builder.CreateAlignedStore(
-          Zero, Element,
-          CharUnits::One().alignmentAtOffset(CharUnits::fromQuantity(Offset)));
-    }
+
+
+  void ClearPadding(Value *Ptr, const BitInterval &PaddingInterval) {
+      auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
+      auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
+  
+      // Calculate byte indices and bit positions
+      auto StartByte = PaddingInterval.First / CharWidth;
+      auto StartBit = PaddingInterval.First % CharWidth;
+      auto EndByte = PaddingInterval.Last / CharWidth;
+      auto EndBit = PaddingInterval.Last % CharWidth;
+  
+      if (StartByte == EndByte) {
+          // Interval is within a single byte
+          auto *Index = ConstantInt::get(CGF.IntTy, StartByte);
+          auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+          Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());
+  
+          auto *Value = CGF.Builder.CreateLoad(ElementAddr);
+  
+          // Create mask to clear bits within the byte
+          uint8_t mask = ((1 << EndBit) - 1) & ~((1 << StartBit) - 1);
+          auto *MaskValue = ConstantInt::get(CGF.Int8Ty, mask);
+          auto *NewValue = CGF.Builder.CreateAnd(Value, MaskValue);
+  
+          CGF.Builder.CreateStore(NewValue, ElementAddr);
+      } else {
+          // Handle the start byte
+          if (StartBit != 0) {
+              auto *Index = ConstantInt::get(CGF.IntTy, StartByte);
+              auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+              Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());
+  
+              auto *Value = CGF.Builder.CreateLoad(ElementAddr);
+  
+              uint8_t startMask = ((1 << (CharWidth - StartBit)) - 1) << StartBit;
+              auto *MaskValue = ConstantInt::get(CGF.Int8Ty, ~startMask);
+              auto *NewValue = CGF.Builder.CreateAnd(Value, MaskValue);
+  
+              CGF.Builder.CreateStore(NewValue, ElementAddr);
+              ++StartByte;
+          }
+  
+          // Handle full bytes in the middle
+          for (auto Offset = StartByte; Offset < EndByte; ++Offset) {
+              auto *Index = ConstantInt::get(CGF.IntTy, Offset);
+              auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+              Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());
+  
+              CGF.Builder.CreateStore(Zero, ElementAddr);
+          }
+  
+          // Handle the end byte
+          if (EndBit != 0) {
+              auto *Index = ConstantInt::get(CGF.IntTy, EndByte);
+              auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+              Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());
+  
+              auto *Value = CGF.Builder.CreateLoad(ElementAddr);
+  
+              uint8_t endMask = (1 << EndBit) - 1;
+              auto *MaskValue = ConstantInt::get(CGF.Int8Ty, endMask);
+              auto *NewValue = CGF.Builder.CreateAnd(Value, MaskValue);
+  
+              CGF.Builder.CreateStore(NewValue, ElementAddr);
+          }
+      }
   }
 
+
   CodeGenFunction &CGF;
   const uint64_t CharWidth;
   std::deque<Data> Queue;
diff --git a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
index 49c57b1473447..ec220088ac1be 100644
--- a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
+++ b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
@@ -29,12 +29,12 @@ void print_bytes(const T* object) {
 
 template <class T>
 void __builtin_clear_padding2(T t) {
-  __builtin_clear_padding(t);
+  //__builtin_clear_padding(t);
   (void)t;
 }
 
 void assert2(bool b){
-  assert(b);
+  //assert(b);
   (void)b;
 }
 
@@ -647,9 +647,9 @@ void structTests() {
     struct S {
       // will usually occupy 2 bytes:
       unsigned char b1 : 3; // 1st 3 bits (in 1st byte) are b1
-      unsigned char    : 2; // next 2 bits (in 1st byte) are blocked out as unused
-      unsigned char b2 : 6; // 6 bits for b2 - doesn't fit into the 1st byte => starts a 2nd
-      unsigned char b3 : 2; // 2 bits for b3 - next (and final) bits in the 2nd byte
+      unsigned char b2 : 2; // next 2 bits (in 1st byte) are blocked out as unused
+      unsigned char b3 : 6; // 6 bits for b2 - doesn't fit into the 1st byte => starts a 2nd
+      unsigned char b4 : 2; // 2 bits for b3 - next (and final) bits in the 2nd byte
     };
 
     S s1, s2;
@@ -658,16 +658,14 @@ void structTests() {
 
     s1.b1 = 5;
     s2.b1 = 5;
-    s1.b2 = 27;
-    s2.b2 = 27;
-    s1.b3 = 3;
-    s2.b3 = 3;
+    s1.b2 = 3;
+    s2.b2 = 3;
+    s1.b3 = 27;
+    s2.b3 = 27;
+    s1.b4 = 3;
+    s2.b4 = 3;
     __builtin_clear_padding2(&s2);
-    //print_bytes(&s1);
-    //print_bytes(&s2);
-    //assert(false);
-    //TODO
-    //assert2(memcmp(&s1, &s2, sizeof(S)) == 0);
+    assert2(memcmp(&s1, &s2, sizeof(S)) == 0);
   }
 
   testAllStructsForType<32, 16, char>(11, 22, 33, 44);

>From 9f7854022a0e42761a68c2afa1b95ca84b925187 Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sun, 25 May 2025 09:38:34 +0100
Subject: [PATCH 04/22] rebase

---
 clang/include/clang/Basic/Builtins.td | 1 +
 clang/lib/CodeGen/CGBuiltin.cpp       | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index e881fda614330..00f5cb8a1d01d 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1261,6 +1261,7 @@ def IsWithinLifetime : LangBuiltin<"CXX_LANG"> {
   let Spellings = ["__builtin_is_within_lifetime"];
   let Attributes = [NoThrow, CustomTypeChecking, Consteval];
   let Prototype = "bool(void*)";
+}
 
 def ClearPadding : LangBuiltin<"CXX_LANG"> {
   let Spellings = ["__builtin_clear_padding"];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 78fc3cf025cd5..c59e31dd6eb32 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2855,12 +2855,12 @@ struct PaddingClearer {
       if (Field->isBitField()) {
         llvm::dbgs() << "clear_padding found bit field. Adding Interval ["
                      << StartBitOffset + FieldOffset << " , "
-                     << FieldOffset + Field->getBitWidthValue(CGF.getContext())
+                     << FieldOffset + Field->getBitWidthValue()
                      << ")\n";
         OccuppiedIntervals.push_back(
             BitInterval{StartBitOffset + FieldOffset,
                         StartBitOffset + FieldOffset +
-                            Field->getBitWidthValue(CGF.getContext())});
+                            Field->getBitWidthValue()});
       } else {
         Queue.push_back(
             Data{StartBitOffset + FieldOffset, Field->getType(), true});

>From 3baab9297d2e5f5225d6b19edc2f54e808a1b5d1 Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sat, 31 May 2025 11:38:29 +0100
Subject: [PATCH 05/22] review feedback

---
 clang/include/clang/Basic/Builtins.td         |   2 +-
 clang/lib/CodeGen/CGBuiltin.cpp               |  35 +-
 .../builtin-clear-padding-codegen.cpp         | 800 ++++++++++++++++--
 3 files changed, 760 insertions(+), 77 deletions(-)

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 00f5cb8a1d01d..f2464add00acb 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1265,7 +1265,7 @@ def IsWithinLifetime : LangBuiltin<"CXX_LANG"> {
 
 def ClearPadding : LangBuiltin<"CXX_LANG"> {
   let Spellings = ["__builtin_clear_padding"];
-  let Attributes = [NoThrow];
+  let Attributes = [NoThrow, CustomTypeChecking];
   let Prototype = "void(void*)";
 }
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index c59e31dd6eb32..0b88a0d32f974 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2706,6 +2706,14 @@ RValue CodeGenFunction::emitStdcFirstBit(const CallExpr *E, Intrinsic::ID IntID,
 
 namespace {
 
+
+// PaddingClearer is a utility class that clears padding bits in a
+// c++ type. It traverses the type recursively, collecting occupied
+// bit intervals, and then compute the padding intervals.
+// In the end, it clears the padding bits by writing zeros
+// to the padding intervals bytes-by-bytes. If a byte only contains
+// some padding bits, it writes zeros to only those bits. This is
+// the case for bit-fields.
 struct PaddingClearer {
   PaddingClearer(CodeGenFunction &F)
       : CGF(F), CharWidth(CGF.getContext().getCharWidth()) {}
@@ -2716,8 +2724,8 @@ struct PaddingClearer {
 
     Queue.push_back(Data{0, Ty, true});
     while (!Queue.empty()) {
-      auto Current = Queue.front();
-      Queue.pop_front();
+      auto Current = Queue.back();
+      Queue.pop_back();
       Visit(Current);
     }
 
@@ -2799,7 +2807,7 @@ struct PaddingClearer {
 
       Queue.push_back(
           Data{StartBitOffset + ArrIndex * Offset.getQuantity() * CharWidth,
-               ElementQualType, true});
+               ElementQualType, /*VisitVirtualBase*/true});
     }
   }
 
@@ -2832,8 +2840,8 @@ struct PaddingClearer {
 
       llvm::dbgs() << "visiting base at offset " << StartBitOffset << " + "
                    << BaseOffset * CharWidth << '\n';
-      Queue.push_back(
-          Data{StartBitOffset + BaseOffset * CharWidth, Base.getType(), false});
+      Queue.push_back(Data{StartBitOffset + BaseOffset * CharWidth,
+                           Base.getType(), /*VisitVirtualBase*/ false});
     };
 
     for (auto Base : R->bases()) {
@@ -2862,8 +2870,8 @@ struct PaddingClearer {
                         StartBitOffset + FieldOffset +
                             Field->getBitWidthValue()});
       } else {
-        Queue.push_back(
-            Data{StartBitOffset + FieldOffset, Field->getType(), true});
+        Queue.push_back(Data{StartBitOffset + FieldOffset, Field->getType(),
+                             /*VisitVirtualBase*/ true});
       }
     }
   }
@@ -2878,9 +2886,10 @@ struct PaddingClearer {
                  << StartBitOffset << "Img from "
                  << StartBitOffset + ImgOffset.getQuantity() * CharWidth
                  << "\n";
-    Queue.push_back(Data{StartBitOffset, ElementQualType, true});
+    Queue.push_back(
+        Data{StartBitOffset, ElementQualType, /*VisitVirtualBase*/ true});
     Queue.push_back(Data{StartBitOffset + ImgOffset.getQuantity() * CharWidth,
-                         ElementQualType, true});
+                         ElementQualType, /*VisitVirtualBase*/ true});
   }
 
   void MergeOccuppiedIntervals() {
@@ -5501,12 +5510,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     return RValue::get(Ptr);
   }
   case Builtin::BI__builtin_clear_padding: {
-    const Expr *Op = E->getArg(0);
-    Value *Address = EmitScalarExpr(Op);
-    auto PointeeTy = Op->getType()->getPointeeType();
+    Address Src = EmitPointerWithAlignment(E->getArg(0));
+    auto PointeeTy = E->getArg(0)->getType()->getPointeeType();
     PaddingClearer clearer{*this};
-    clearer.run(Address, PointeeTy);
-    //RecursivelyClearPadding(*this, Address, PointeeTy);
+    clearer.run(Src.getBasePointer(), PointeeTy);
     return RValue::get(nullptr);
   }
   case Builtin::BI__sync_fetch_and_add:
diff --git a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
index 54455e6699849..caea427d1d4dd 100644
--- a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
+++ b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
@@ -1,4 +1,158 @@
-// RUN: %clang_cc1 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -std=c++20 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+
+struct Empty {};
+
+// CHECK-LABEL: define dso_local void @_Z9testEmptyP5Empty(
+// CHECK-SAME: ptr noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[E_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[E]], ptr [[E_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    ret void
+//
+void testEmpty(Empty *e) {
+  // This should clear the one byte that Emtpy occupies.
+  __builtin_clear_padding(e);
+}
+
+
+// CHECK-LABEL: define dso_local void @_Z22testPrimitiveNoPaddingPi(
+// CHECK-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[I]], ptr [[I_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// CHECK-NEXT:    ret void
+//
+void testPrimitiveNoPadding(int *i) {
+  // This should not clear any padding, since int has no padding.
+  __builtin_clear_padding(i);
+}
+
+
+// CHECK-LABEL: define dso_local void @_Z23testPrimitiveLongDoublePe(
+// CHECK-SAME: ptr noundef [[LD:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[LD]], ptr [[LD_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[LD_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// CHECK-NEXT:    ret void
+//
+void testPrimitiveLongDouble(long double *ld) {
+  // padding [10, 15] on x86
+  __builtin_clear_padding(ld);
+}
+
+
+// CHECK-LABEL: define dso_local void @_Z30testPrimitiveComplexLongDoublePCe(
+// CHECK-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
+// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
+// CHECK-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
+// CHECK-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// CHECK-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// CHECK-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// CHECK-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// CHECK-NEXT:    ret void
+//
+void testPrimitiveComplexLongDouble(_Complex long double *c) {
+  // padding [10, 15] and [26, 31] on x86
+  __builtin_clear_padding(c);
+}
+
+union U1 {
+  int i;
+  char c;
+};
+
+// CHECK-LABEL: define dso_local void @_Z24testUnionDifferentLengthP2U1(
+// CHECK-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// CHECK-NEXT:    ret void
+//
+void testUnionDifferentLength(U1 *u) {
+  // This should not clear the object representation bits of the non-active member.
+  __builtin_clear_padding(u);
+}
+
+
+struct S {
+  alignas(8) char c1;
+};
+
+union U2 {
+  S s1;
+  char c2;
+};
+
+// CHECK-LABEL: define dso_local void @_Z35testUnionTailPaddingOfLongestMemberP2U2(
+// CHECK-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// CHECK-NEXT:    ret void
+//
+void testUnionTailPaddingOfLongestMember(U2 *u) {
+  // This should clear the tail padding of the longest member.
+  // [1 - 7]
+  __builtin_clear_padding(u);
+}
+
 
 struct alignas(4) Foo {
   char a;
@@ -21,34 +175,57 @@ struct alignas(4) Baz : Foo {
 // %struct.Foo = type { i8, i8, i8, i8 }
 // %struct.Bar = type { i8, i8, i8, i8 }
 
-// CHECK-LABEL: define void @_Z7testBazP3Baz(%struct.Baz* %baz)
-// CHECK: [[ADDR:%.*]] = alloca %struct.Baz*
-// CHECK: store %struct.Baz* %baz, %struct.Baz** [[ADDR]]
-// CHECK: [[BAZ:%.*]] = load %struct.Baz*, %struct.Baz** [[ADDR]]
-// CHECK: [[BAZ_RAW_PTR:%.*]] = bitcast %struct.Baz* [[BAZ]] to i8*
-
-// CHECK: [[FOO_BASE:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 0
-// CHECK: [[FOO_RAW_PTR:%.*]] = bitcast %struct.Foo* [[FOO_BASE]] to i8*
-// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 1
-// CHECK: store i8 0, i8* [[PAD_1]]
-// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 3
-// CHECK: store i8 0, i8* [[PAD_2]]
-
-// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 5
-// CHECK: store i8 0, i8* [[PAD_3]]
-// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 6
-// CHECK: store i8 0, i8* [[PAD_4]]
-// CHECK: [[PAD_5:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 7
-// CHECK: store i8 0, i8* [[PAD_5]]
-
-// CHECK: [[BAR_MEMBER:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 3
-// CHECK: [[BAR_RAW_PTR:%.*]] = bitcast %struct.Bar* [[BAR_MEMBER]] to i8*
-// CHECK: [[PAD_6:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 1
-// CHECK: store i8 0, i8* [[PAD_6]]
-// CHECK: [[PAD_7:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 3
-// CHECK: store i8 0, i8* [[PAD_7]]
-// CHECK: ret void
-void testBaz(Baz *baz) {
+// CHECK-LABEL: define dso_local void @_Z33testStructPaddingInBetweenMembersP3Baz(
+// CHECK-SAME: ptr noundef [[BAZ:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[BAZ_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[BAZ]], ptr [[BAZ_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAZ_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
+// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// CHECK-NEXT:    ret void
+//
+void testStructPaddingInBetweenMembers(Baz *baz) {
+  // this should clear all the padding in between various members
+  __builtin_clear_padding(baz);
+}
+
+// CHECK-LABEL: define dso_local void @_Z18testStructVolatilePV3Baz(
+// CHECK-SAME: ptr noundef [[BAZ:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[BAZ_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[BAZ]], ptr [[BAZ_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAZ_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
+// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// CHECK-NEXT:    ret void
+//
+void testStructVolatile(volatile Baz *baz) {
+  // this should clear all the padding in between various members
   __builtin_clear_padding(baz);
 }
 
@@ -63,24 +240,525 @@ struct UnsizedTail {
 // "size", PAD_1, PAD_2, PAD_3, PAD_4
 // %struct.UnsizedTail = type { i32, [4 x i8], [0 x i8] }
 
-// CHECK-LABEL: define void @_Z15testUnsizedTailP11UnsizedTail(%struct.UnsizedTail* %u)
-// CHECK: [[U_ADDR:%.*]] = alloca %struct.UnsizedTail*
-// CHECK: store %struct.UnsizedTail* %u, %struct.UnsizedTail** [[U_ADDR]]
-// CHECK: [[U:%.*]] = load %struct.UnsizedTail*, %struct.UnsizedTail** [[U_ADDR]]
-// CHECK: [[U_RAW_PTR:%.*]] = bitcast %struct.UnsizedTail* [[U]] to i8*
-// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 4
-// CHECK: store i8 0, i8* [[PAD_1]]
-// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 5
-// CHECK: store i8 0, i8* [[PAD_2]]
-// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 6
-// CHECK: store i8 0, i8* [[PAD_3]]
-// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 7
-// CHECK: store i8 0, i8* [[PAD_4]]
-// CHECK: ret void
-void testUnsizedTail(UnsizedTail *u) {
+// CHECK-LABEL: define dso_local void @_Z21testStructUnsizedTailP11UnsizedTail(
+// CHECK-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    ret void
+//
+void testStructUnsizedTail(UnsizedTail *u) {
   __builtin_clear_padding(u);
 }
 
+class S1 {
+  int x;
+  char c;
+};
+
+class S2 {
+  [[no_unique_address]] S1 s1;
+  bool b;
+};
+
+// CHECK-LABEL: define dso_local void @_Z19testNoUniqueAddressP2S2(
+// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    ret void
+//
+void testNoUniqueAddress(S2 *s) {
+  // "x [0-3]",  "c" , "b", PAD [6-7]
+  __builtin_clear_padding(s);
+}
+
+struct S3 {
+  long double l;
+  bool b;
+};
+
+// CHECK-LABEL: define dso_local void @_Z24testStructWithLongDoubleP2S3(
+// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 17
+// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 18
+// CHECK-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 19
+// CHECK-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 20
+// CHECK-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 21
+// CHECK-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 22
+// CHECK-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP0]], i32 23
+// CHECK-NEXT:    store i8 0, ptr [[TMP13]], align 1
+// CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i32 24
+// CHECK-NEXT:    store i8 0, ptr [[TMP14]], align 1
+// CHECK-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TMP0]], i32 25
+// CHECK-NEXT:    store i8 0, ptr [[TMP15]], align 1
+// CHECK-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
+// CHECK-NEXT:    store i8 0, ptr [[TMP16]], align 1
+// CHECK-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
+// CHECK-NEXT:    store i8 0, ptr [[TMP17]], align 1
+// CHECK-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
+// CHECK-NEXT:    store i8 0, ptr [[TMP18]], align 1
+// CHECK-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// CHECK-NEXT:    store i8 0, ptr [[TMP19]], align 1
+// CHECK-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// CHECK-NEXT:    store i8 0, ptr [[TMP20]], align 1
+// CHECK-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// CHECK-NEXT:    store i8 0, ptr [[TMP21]], align 1
+// CHECK-NEXT:    ret void
+//
+void testStructWithLongDouble(S3 *s) {
+  // "long double data[0-9]", PAD [10-15], "b", PAD [17-31]
+  __builtin_clear_padding(s);
+}
+
+struct B {
+  int i;
+};
+struct S4 : Empty, B {
+  bool b;
+};
+
+// CHECK-LABEL: define dso_local void @_Z23testStructWithEmptyBaseP2S4(
+// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    ret void
+//
+void testStructWithEmptyBase(S4 *s) {
+  // "i" [0-3], "b" [4], PAD [5-7]
+  __builtin_clear_padding(s);
+}
+
+struct B1 {
+  char c1;
+};
+struct B2 {
+  alignas(4) char c2;
+};
+struct S5 : B1, B2 {
+};
+
+// CHECK-LABEL: define dso_local void @_Z23testPaddingBetweenBasesP2S5(
+// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// CHECK-NEXT:    ret void
+//
+void testPaddingBetweenBases(S5 *s) {
+  // "c1" [0], PAD [1-3] , "c2" [4], PAD [5-7]
+  __builtin_clear_padding(s);
+}
+
+struct B3 {
+  char c1;
+};
+
+struct B4 {
+  char c2;
+};
+
+struct S6 : B3, B4 {
+  alignas(4) char c3;
+};
+
+// CHECK-LABEL: define dso_local void @_Z24testPaddingAfterLastBaseP2S6(
+// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    ret void
+//
+void testPaddingAfterLastBase(S6 *s) {
+  // "c1"[0], "c2"[1], PAD [2-3], "c3" [4], PAD [5-7]
+  __builtin_clear_padding(s);
+}
+
+
+struct VirtualBase {
+  unsigned int x;
+  virtual int call() { return x; };
+  virtual ~VirtualBase() = default;
+};
+
+struct NonVirtualBase {
+  char y;
+};
+
+struct S7 : VirtualBase, NonVirtualBase {
+  virtual int call() override { return 5; }
+  bool z;
+};
+
+// CHECK-LABEL: define dso_local void @_Z10testVtableP2S7(
+// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    ret void
+//
+void testVtable(S7 *s) {
+  // "vtable ptr" [0-7], "x" [8-11], "y" [12], "z" [13], PAD [14-15]
+  __builtin_clear_padding(s);
+}
+
+struct VirtualBase1 {
+  unsigned int x1;
+  virtual int call1() { return x1; };
+  virtual ~VirtualBase1() = default;
+};
+
+struct VirtualBase2 {
+  unsigned int x2;
+  virtual int call2() { return x2; };
+  virtual ~VirtualBase2() = default;
+};
+
+struct VirtualBase3 {
+  unsigned int x3;
+  virtual int call3() { return x3; };
+  virtual ~VirtualBase3() = default;
+};
+
+struct NonVirtualBase1 {
+  char y;
+};
+struct S8 : VirtualBase1, VirtualBase2, NonVirtualBase1, VirtualBase3 {
+  virtual int call1() override { return 5; }
+  bool z;
+};
+
+// CHECK-LABEL: define dso_local void @_Z23testMultipleBasesVtableP2S8(
+// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 45
+// CHECK-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 46
+// CHECK-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 47
+// CHECK-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// CHECK-NEXT:    ret void
+//
+void testMultipleBasesVtable(S8 *s) {
+  // "vtable ptr" [0-7], "x1" [8-11], PAD "[12-15]",
+  // "vtable ptr" [16-23], "x2" [24-27], "y" [28], PAD "[29-31]",
+  // "vtable_ptr" [32-39], "x3" [40-43], "z" [44], PAD [45-47]
+  __builtin_clear_padding(s);
+}
+
+struct VirtualChain1 {
+  unsigned int x1;
+  virtual int call1() { return x1; };
+  virtual ~VirtualChain1() = default;
+};
+
+struct VirtualChain2 : VirtualChain1 {
+  unsigned int x2;
+  virtual int call2() { return x2; };
+  virtual ~VirtualChain2() = default;
+};
+
+struct VirtualChain3 : VirtualChain2 {
+  unsigned int x3;
+  virtual int call3() { return x3; };
+  virtual ~VirtualChain3() = default;
+};
+
+struct NonVirtualBase2 {
+  char y;
+};
+
+struct S9 : NonVirtualBase2, VirtualChain3 {
+  bool z;
+};
+
+// CHECK-LABEL: define dso_local void @_Z16testVirtualChainP2S9(
+// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 22
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 23
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    ret void
+//
+void testVirtualChain(S9 *s) {
+  // This should clear the padding after the bool z.
+  // base reordered
+  // "vtable ptr" [0-7],  "x1" [8-11], x2 [12-15], x3 [16-19],
+  // y [20], z [21], PAD [22-23]
+  __builtin_clear_padding(s);
+}
+
+
+struct Base {
+  int x;
+};
+
+struct D1 : virtual Base {
+  int d1;
+  bool b1;
+};
+struct D2 : virtual Base {
+  int d2;
+  bool b2;
+};
+
+struct S10 : D1, D2 {
+  bool s;
+};
+
+// CHECK-LABEL: define dso_local void @_Z22testVirtualInheritanceP3S10(
+// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 36
+// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 37
+// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 38
+// CHECK-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 39
+// CHECK-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// CHECK-NEXT:    ret void
+//
+void testVirtualInheritance(S10 *s) {
+  // note derived member placed before the virtual base
+  // "vtable ptr" [0-7],  "d1" [8-11], "b1" [12], PAD [13-15],
+  // "vtable ptr" [16-23],  "d2" [24-27], "b2" [28], s [29],  PAD [30-31],
+  // "x" [32-35], PAD [36-39]
+  __builtin_clear_padding(s);
+}
+
+struct S11 {
+  // will usually occupy 2 bytes:
+  unsigned char b1 : 3; // 1st 3 bits (in 1st byte) are b1
+  unsigned char b2 : 2; // next 2 bits (in 1st byte). The rest bits in byte 1 are unused
+  unsigned char b3 : 6; // 6 bits for b3 - doesn't fit into the 1st byte => starts a 2nd
+  unsigned char b4 : 2; // 2 bits for b4 - next (and final) bits in the 2nd byte
+};
+
+// CHECK-LABEL: define dso_local void @_Z13testBitFieldsP3S11(
+// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], 31
+// CHECK-NEXT:    store i8 [[TMP3]], ptr [[TMP1]], align 1
+// CHECK-NEXT:    ret void
+//
+void testBitFields(S11 *s) {
+  // "b1" [0-2], "b2" [3-4], PAD [5-7], "b3" [8-13], "b4" [14-15]
+  // to clear 5-7, we should AND 0b00011111 (31)
+  __builtin_clear_padding(s);
+}
+
+
+// CHECK-LABEL: define dso_local void @_Z18testArrayNoPaddingv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I:%.*]] = alloca [4 x i32], align 16
+// CHECK-NEXT:    ret void
+//
+void testArrayNoPadding() {
+  int i[4];
+  // there is no padding in the array.
+  __builtin_clear_padding(&i);
+}
+
+// CHECK-LABEL: define dso_local void @_Z19testArrayLongDoubleRA2_e(
+// CHECK-SAME: ptr noundef nonnull align 16 dereferenceable(32) [[ARR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ARR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[LD:%.*]] = alloca [2 x x86_fp80], align 16
+// CHECK-NEXT:    store ptr [[ARR]], ptr [[ARR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[LD]], i32 10
+// CHECK-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[LD]], i32 11
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[LD]], i32 12
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[LD]], i32 13
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[LD]], i32 14
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[LD]], i32 15
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[LD]], i32 26
+// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[LD]], i32 27
+// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[LD]], i32 28
+// CHECK-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[LD]], i32 29
+// CHECK-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[LD]], i32 30
+// CHECK-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[LD]], i32 31
+// CHECK-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// CHECK-NEXT:    ret void
+//
+void testArrayLongDouble(long double (&arr)[2]) {
+  // long double 0, [0-9] PAD [10-15]
+  // long double 1, [16-25] PAD [26-31]
+  long double ld[2];
+  __builtin_clear_padding(&ld);
+}
+
+// CHECK-LABEL: define dso_local void @_Z17testArrayOfStructv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S:%.*]] = alloca [2 x %struct.S.0], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 5
+// CHECK-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 6
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 13
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 15
+// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 21
+// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 22
+// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 23
+// CHECK-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 29
+// CHECK-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[S]], i32 30
+// CHECK-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[S]], i32 31
+// CHECK-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// CHECK-NEXT:    ret void
+//
+void testArrayOfStruct() {
+  struct S {
+    int i1;
+    char c1;
+    int i2;
+    char c2;
+  };
+
+  // S[0].i1 [0-3], S[0].c1 [4], PAD [5-7],
+  // S[0].i2 [8-11], S[0].c2 [12], PAD [13-15],
+  // S[1].i1 [16-19], S[1].c1 [20], PAD [21-23],
+  // S[1].i2 [24-27], S[1].c2 [28], PAD [29-31]
+
+  S s[2];
+  __builtin_clear_padding(&s);
+}
+
 struct ArrOfStructsWithPadding {
   Bar bars[2];
 };
@@ -89,24 +767,22 @@ struct ArrOfStructsWithPadding {
 // "c" (1), PAD_1, "d" (1), PAD_2, "c" (2), PAD_3, "d" (2), PAD_4
 // %struct.ArrOfStructsWithPadding = type { [2 x %struct.Bar] }
 
-// CHECK-LABEL: define void @_Z27testArrOfStructsWithPaddingP23ArrOfStructsWithPadding(%struct.ArrOfStructsWithPadding* %arr)
-// CHECK: [[ARR_ADDR:%.*]] = alloca %struct.ArrOfStructsWithPadding*
-// CHECK: store %struct.ArrOfStructsWithPadding* %arr, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
-// CHECK: [[ARR:%.*]] = load %struct.ArrOfStructsWithPadding*, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
-// CHECK: [[BARS:%.*]] = getelementptr inbounds %struct.ArrOfStructsWithPadding, %struct.ArrOfStructsWithPadding* [[ARR]], i32 0, i32 0
-// CHECK: [[FIRST:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 0
-// CHECK: [[FIRST_RAW_PTR:%.*]] = bitcast %struct.Bar* [[FIRST]] to i8*
-// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FIRST_RAW_PTR]], i32 1
-// CHECK: store i8 0, i8* [[PAD_1]]
-// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* %4, i32 3
-// CHECK: store i8 0, i8* [[PAD_2]]
-// CHECK: [[SECOND:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 1
-// CHECK: [[SECOND_RAW_PTR:%.*]] = bitcast %struct.Bar* [[SECOND]] to i8*
-// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 1
-// CHECK: store i8 0, i8* [[PAD_3]]
-// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 3
-// CHECK: store i8 0, i8* [[PAD_4]]
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @_Z27testArrOfStructsWithPaddingP23ArrOfStructsWithPadding(
+// CHECK-SAME: ptr noundef [[ARR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ARR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[ARR]], ptr [[ARR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// CHECK-NEXT:    ret void
+//
 void testArrOfStructsWithPadding(ArrOfStructsWithPadding *arr) {
   __builtin_clear_padding(arr);
 }

>From 5d74e88c1bdaaab0d6e4e838e66809f1e9925f82 Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sat, 29 Nov 2025 15:25:29 +0000
Subject: [PATCH 06/22] rebase

---
 clang/lib/CodeGen/CGBuiltin.cpp                         | 3 ---
 clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 0b88a0d32f974..148c83314003b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -35,14 +35,11 @@
 #include "llvm/IR/MatrixBuilder.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/ScopedPrinter.h"
-#include "llvm/TargetParser/AArch64TargetParser.h"
-#include "llvm/TargetParser/X86TargetParser.h"
 #include <algorithm>
 #include <optional>
 #include <utility>
 #include <deque>
 #include <vector>
-#include <sstream>
 
 using namespace clang;
 using namespace CodeGen;
diff --git a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
index caea427d1d4dd..b9ce468b5aa64 100644
--- a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
+++ b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
@@ -15,7 +15,7 @@ struct Empty {};
 // CHECK-NEXT:    ret void
 //
 void testEmpty(Empty *e) {
-  // This should clear the one byte that Emtpy occupies.
+  // This should clear the one byte that Empty occupies.
   __builtin_clear_padding(e);
 }
 

>From 57103a4ed0562e082a8190cd3a81a95fe3af2085 Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sat, 29 Nov 2025 15:30:17 +0000
Subject: [PATCH 07/22] clang format

---
 clang/lib/CodeGen/CGBuiltin.cpp               | 142 +++++++++---------
 .../atomics/builtin_clear_padding.pass.cpp    |   6 +-
 2 files changed, 71 insertions(+), 77 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 148c83314003b..6631bc5d4d5a1 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -36,9 +36,9 @@
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include <algorithm>
+#include <deque>
 #include <optional>
 #include <utility>
-#include <deque>
 #include <vector>
 
 using namespace clang;
@@ -2703,7 +2703,6 @@ RValue CodeGenFunction::emitStdcFirstBit(const CallExpr *E, Intrinsic::ID IntID,
 
 namespace {
 
-
 // PaddingClearer is a utility class that clears padding bits in a
 // c++ type. It traverses the type recursively, collecting occupied
 // bit intervals, and then compute the padding intervals.
@@ -2804,7 +2803,7 @@ struct PaddingClearer {
 
       Queue.push_back(
           Data{StartBitOffset + ArrIndex * Offset.getQuantity() * CharWidth,
-               ElementQualType, /*VisitVirtualBase*/true});
+               ElementQualType, /*VisitVirtualBase*/ true});
     }
   }
 
@@ -2860,12 +2859,10 @@ struct PaddingClearer {
       if (Field->isBitField()) {
         llvm::dbgs() << "clear_padding found bit field. Adding Interval ["
                      << StartBitOffset + FieldOffset << " , "
-                     << FieldOffset + Field->getBitWidthValue()
-                     << ")\n";
-        OccuppiedIntervals.push_back(
-            BitInterval{StartBitOffset + FieldOffset,
-                        StartBitOffset + FieldOffset +
-                            Field->getBitWidthValue()});
+                     << FieldOffset + Field->getBitWidthValue() << ")\n";
+        OccuppiedIntervals.push_back(BitInterval{
+            StartBitOffset + FieldOffset,
+            StartBitOffset + FieldOffset + Field->getBitWidthValue()});
       } else {
         Queue.push_back(Data{StartBitOffset + FieldOffset, Field->getType(),
                              /*VisitVirtualBase*/ true});
@@ -2937,75 +2934,72 @@ struct PaddingClearer {
     return Results;
   }
 
+  void ClearPadding(Value *Ptr, const BitInterval &PaddingInterval) {
+    auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
+    auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
 
+    // Calculate byte indices and bit positions
+    auto StartByte = PaddingInterval.First / CharWidth;
+    auto StartBit = PaddingInterval.First % CharWidth;
+    auto EndByte = PaddingInterval.Last / CharWidth;
+    auto EndBit = PaddingInterval.Last % CharWidth;
 
-  void ClearPadding(Value *Ptr, const BitInterval &PaddingInterval) {
-      auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
-      auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
-  
-      // Calculate byte indices and bit positions
-      auto StartByte = PaddingInterval.First / CharWidth;
-      auto StartBit = PaddingInterval.First % CharWidth;
-      auto EndByte = PaddingInterval.Last / CharWidth;
-      auto EndBit = PaddingInterval.Last % CharWidth;
-  
-      if (StartByte == EndByte) {
-          // Interval is within a single byte
-          auto *Index = ConstantInt::get(CGF.IntTy, StartByte);
-          auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
-          Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());
-  
-          auto *Value = CGF.Builder.CreateLoad(ElementAddr);
-  
-          // Create mask to clear bits within the byte
-          uint8_t mask = ((1 << EndBit) - 1) & ~((1 << StartBit) - 1);
-          auto *MaskValue = ConstantInt::get(CGF.Int8Ty, mask);
-          auto *NewValue = CGF.Builder.CreateAnd(Value, MaskValue);
-  
-          CGF.Builder.CreateStore(NewValue, ElementAddr);
-      } else {
-          // Handle the start byte
-          if (StartBit != 0) {
-              auto *Index = ConstantInt::get(CGF.IntTy, StartByte);
-              auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
-              Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());
-  
-              auto *Value = CGF.Builder.CreateLoad(ElementAddr);
-  
-              uint8_t startMask = ((1 << (CharWidth - StartBit)) - 1) << StartBit;
-              auto *MaskValue = ConstantInt::get(CGF.Int8Ty, ~startMask);
-              auto *NewValue = CGF.Builder.CreateAnd(Value, MaskValue);
-  
-              CGF.Builder.CreateStore(NewValue, ElementAddr);
-              ++StartByte;
-          }
-  
-          // Handle full bytes in the middle
-          for (auto Offset = StartByte; Offset < EndByte; ++Offset) {
-              auto *Index = ConstantInt::get(CGF.IntTy, Offset);
-              auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
-              Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());
-  
-              CGF.Builder.CreateStore(Zero, ElementAddr);
-          }
-  
-          // Handle the end byte
-          if (EndBit != 0) {
-              auto *Index = ConstantInt::get(CGF.IntTy, EndByte);
-              auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
-              Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());
-  
-              auto *Value = CGF.Builder.CreateLoad(ElementAddr);
-  
-              uint8_t endMask = (1 << EndBit) - 1;
-              auto *MaskValue = ConstantInt::get(CGF.Int8Ty, endMask);
-              auto *NewValue = CGF.Builder.CreateAnd(Value, MaskValue);
-  
-              CGF.Builder.CreateStore(NewValue, ElementAddr);
-          }
+    if (StartByte == EndByte) {
+      // Interval is within a single byte
+      auto *Index = ConstantInt::get(CGF.IntTy, StartByte);
+      auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+      Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());
+
+      auto *Value = CGF.Builder.CreateLoad(ElementAddr);
+
+      // Create mask to clear bits within the byte
+      uint8_t mask = ((1 << EndBit) - 1) & ~((1 << StartBit) - 1);
+      auto *MaskValue = ConstantInt::get(CGF.Int8Ty, mask);
+      auto *NewValue = CGF.Builder.CreateAnd(Value, MaskValue);
+
+      CGF.Builder.CreateStore(NewValue, ElementAddr);
+    } else {
+      // Handle the start byte
+      if (StartBit != 0) {
+        auto *Index = ConstantInt::get(CGF.IntTy, StartByte);
+        auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+        Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());
+
+        auto *Value = CGF.Builder.CreateLoad(ElementAddr);
+
+        uint8_t startMask = ((1 << (CharWidth - StartBit)) - 1) << StartBit;
+        auto *MaskValue = ConstantInt::get(CGF.Int8Ty, ~startMask);
+        auto *NewValue = CGF.Builder.CreateAnd(Value, MaskValue);
+
+        CGF.Builder.CreateStore(NewValue, ElementAddr);
+        ++StartByte;
+      }
+
+      // Handle full bytes in the middle
+      for (auto Offset = StartByte; Offset < EndByte; ++Offset) {
+        auto *Index = ConstantInt::get(CGF.IntTy, Offset);
+        auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+        Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());
+
+        CGF.Builder.CreateStore(Zero, ElementAddr);
       }
-  }
 
+      // Handle the end byte
+      if (EndBit != 0) {
+        auto *Index = ConstantInt::get(CGF.IntTy, EndByte);
+        auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+        Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());
+
+        auto *Value = CGF.Builder.CreateLoad(ElementAddr);
+
+        uint8_t endMask = (1 << EndBit) - 1;
+        auto *MaskValue = ConstantInt::get(CGF.Int8Ty, endMask);
+        auto *NewValue = CGF.Builder.CreateAnd(Value, MaskValue);
+
+        CGF.Builder.CreateStore(NewValue, ElementAddr);
+      }
+    }
+  }
 
   CodeGenFunction &CGF;
   const uint64_t CharWidth;
diff --git a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
index ec220088ac1be..163ffb77ed988 100644
--- a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
+++ b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
@@ -33,7 +33,7 @@ void __builtin_clear_padding2(T t) {
   (void)t;
 }
 
-void assert2(bool b){
+void assert2(bool b) {
   //assert(b);
   (void)b;
 }
@@ -307,8 +307,8 @@ void primitiveTests() {
 
     memset(&c1, 42, sizeof(_Complex long double));
     memset(&c2, 0, sizeof(_Complex long double));
-    c1 = 3.0L ;
-    c1 = 3.0L ;
+    c1 = 3.0L;
+    c1 = 3.0L;
     __builtin_clear_padding2(&c1);
     //TODO
   }

>From e95e1866057fa980dd8c5b0e59217f96eccdd13c Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sat, 29 Nov 2025 16:29:54 +0000
Subject: [PATCH 08/22] cxx test

---
 libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
index 163ffb77ed988..2f6c83411f57f 100644
--- a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
+++ b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 // UNSUPPORTED: c++03
+// UNSUPPORTED: gcc
 
 // ADDITIONAL_COMPILE_FLAGS: -Wno-deprecated-volatile -Wno-dynamic-class-memaccess
 

>From 740c58985a8af360a17d1af34adaac54c7a9f8a7 Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sat, 29 Nov 2025 16:35:37 +0000
Subject: [PATCH 09/22] cxx test

---
 .../atomics/builtin_clear_padding.pass.cpp    | 239 ++++++++----------
 1 file changed, 108 insertions(+), 131 deletions(-)

diff --git a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
index 2f6c83411f57f..8698cd26d2607 100644
--- a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
+++ b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 // UNSUPPORTED: c++03
 // UNSUPPORTED: gcc
+// UNSUPPORTED: clang-19, clang-20, clang-21, clang-22
 
 // ADDITIONAL_COMPILE_FLAGS: -Wno-deprecated-volatile -Wno-dynamic-class-memaccess
 
@@ -15,30 +16,6 @@
 #include <cstring>
 #include <new>
 
-template <class T>
-void print_bytes(const T* object) {
-  auto size                        = sizeof(T);
-  const unsigned char* const bytes = reinterpret_cast<const unsigned char*>(object);
-  size_t i;
-
-  fprintf(stderr, "[ ");
-  for (i = 0; i < size; i++) {
-    fprintf(stderr, "%02x ", bytes[i]);
-  }
-  fprintf(stderr, "]\n");
-}
-
-template <class T>
-void __builtin_clear_padding2(T t) {
-  //__builtin_clear_padding(t);
-  (void)t;
-}
-
-void assert2(bool b) {
-  //assert(b);
-  (void)b;
-}
-
 template <size_t A1, size_t A2, class T>
 struct alignas(A1) BasicWithPadding {
   T x;
@@ -105,9 +82,9 @@ void testAllStructsForType(T a, T b, T c, T d) {
     memset(&basic2, 42, sizeof(B));
     basic2.x = a;
     basic2.y = b;
-    assert2(memcmp(&basic1, &basic2, sizeof(B)) != 0);
-    __builtin_clear_padding2(&basic2);
-    assert2(memcmp(&basic1, &basic2, sizeof(B)) == 0);
+    assert(memcmp(&basic1, &basic2, sizeof(B)) != 0);
+    __builtin_clear_padding(&basic2);
+    assert(memcmp(&basic1, &basic2, sizeof(B)) == 0);
   }
 
   // spaced array
@@ -126,9 +103,9 @@ void testAllStructsForType(T a, T b, T c, T d) {
     arr2.y[0] = c;
     arr2.y[1] = d;
     arr2.c    = 0;
-    assert2(memcmp(&arr1, &arr2, sizeof(A)) != 0);
-    __builtin_clear_padding2(&arr2);
-    assert2(memcmp(&arr1, &arr2, sizeof(A)) == 0);
+    assert(memcmp(&arr1, &arr2, sizeof(A)) != 0);
+    __builtin_clear_padding(&arr2);
+    assert(memcmp(&arr1, &arr2, sizeof(A)) == 0);
   }
 
   // pointer members
@@ -142,9 +119,9 @@ void testAllStructsForType(T a, T b, T c, T d) {
     memset(&ptr2, 42, sizeof(P));
     ptr2.x = &a;
     ptr2.y = &b;
-    assert2(memcmp(&ptr1, &ptr2, sizeof(P)) != 0);
-    __builtin_clear_padding2(&ptr2);
-    assert2(memcmp(&ptr1, &ptr2, sizeof(P)) == 0);
+    assert(memcmp(&ptr1, &ptr2, sizeof(P)) != 0);
+    __builtin_clear_padding(&ptr2);
+    assert(memcmp(&ptr1, &ptr2, sizeof(P)) == 0);
   }
 
   // three members
@@ -160,8 +137,8 @@ void testAllStructsForType(T a, T b, T c, T d) {
     three2.x = a;
     three2.y = b;
     three2.z = c;
-    __builtin_clear_padding2(&three2);
-    assert2(memcmp(&three1, &three2, sizeof(Three)) == 0);
+    __builtin_clear_padding(&three2);
+    assert(memcmp(&three1, &three2, sizeof(Three)) == 0);
   }
 
   // Normal struct no padding
@@ -175,8 +152,8 @@ void testAllStructsForType(T a, T b, T c, T d) {
     memset(&normal2, 42, sizeof(N));
     normal2.a = a;
     normal2.b = b;
-    __builtin_clear_padding2(&normal2);
-    assert2(memcmp(&normal1, &normal2, sizeof(N)) == 0);
+    __builtin_clear_padding(&normal2);
+    assert(memcmp(&normal1, &normal2, sizeof(N)) == 0);
   }
 
   // base class
@@ -196,9 +173,9 @@ void testAllStructsForType(T a, T b, T c, T d) {
     base2.x = c;
     base2.y = d;
     base2.z = a;
-    assert2(memcmp(&base1, &base2, sizeof(H)) != 0);
-    __builtin_clear_padding2(&base2);
-    assert2(memcmp(&base1, &base2, sizeof(H)) == 0);
+    assert(memcmp(&base1, &base2, sizeof(H)) != 0);
+    __builtin_clear_padding(&base2);
+    assert(memcmp(&base1, &base2, sizeof(H)) == 0);
   }
 }
 
@@ -227,10 +204,10 @@ void otherStructTests() {
     u2->buf[1] = 2;
     u2->buf[2] = 3;
     u2->buf[3] = 4;
-    assert2(memcmp(u1, u2, sizeof(UnsizedTail)) != 0);
-    __builtin_clear_padding2(u2);
+    assert(memcmp(u1, u2, sizeof(UnsizedTail)) != 0);
+    __builtin_clear_padding(u2);
 
-    assert2(memcmp(u1, u2, sizeof(UnsizedTail)) == 0);
+    assert(memcmp(u1, u2, sizeof(UnsizedTail)) == 0);
   }
 
   // basic padding on the heap
@@ -244,9 +221,9 @@ void otherStructTests() {
     memset(basic2, 42, sizeof(B));
     basic2->x = 1;
     basic2->y = 2;
-    assert2(memcmp(basic1, basic2, sizeof(B)) != 0);
-    __builtin_clear_padding2(basic2);
-    assert2(memcmp(basic1, basic2, sizeof(B)) == 0);
+    assert(memcmp(basic1, basic2, sizeof(B)) != 0);
+    __builtin_clear_padding(basic2);
+    assert(memcmp(basic1, basic2, sizeof(B)) == 0);
     delete basic2;
     delete basic1;
   }
@@ -262,10 +239,10 @@ void otherStructTests() {
     memset(basic4, 42, sizeof(B));
     basic4->x = 1;
     basic4->y = 2;
-    assert2(memcmp(basic3, basic4, sizeof(B)) != 0);
-    __builtin_clear_padding2(const_cast<volatile B*>(basic4));
-    __builtin_clear_padding2(basic4);
-    assert2(memcmp(basic3, basic4, sizeof(B)) == 0);
+    assert(memcmp(basic3, basic4, sizeof(B)) != 0);
+    __builtin_clear_padding(const_cast<volatile B*>(basic4));
+    __builtin_clear_padding(basic4);
+    assert(memcmp(basic3, basic4, sizeof(B)) == 0);
     delete basic4;
     delete basic3;
   }
@@ -283,9 +260,9 @@ void primitiveTests() {
   // no padding
   {
     int i1 = 42, i2 = 42;
-    __builtin_clear_padding2(&i1); // does nothing
-    assert2(i1 == 42);
-    assert2(memcmp(&i1, &i2, sizeof(int)) == 0);
+    __builtin_clear_padding(&i1); // does nothing
+    assert(i1 == 42);
+    assert(memcmp(&i1, &i2, sizeof(int)) == 0);
   }
 
   // long double
@@ -297,9 +274,9 @@ void primitiveTests() {
     d1 = 3.0L;
     d2 = 3.0L;
 
-    __builtin_clear_padding2(&d1);
-    assert2(d1 == 3.0L);
-    assert2(memcmp(&d1, &d2, sizeof(long double)) == 0);
+    __builtin_clear_padding(&d1);
+    assert(d1 == 3.0L);
+    assert(memcmp(&d1, &d2, sizeof(long double)) == 0);
   }
 
   // _Complex
@@ -310,7 +287,7 @@ void primitiveTests() {
     memset(&c2, 0, sizeof(_Complex long double));
     c1 = 3.0L;
     c1 = 3.0L;
-    __builtin_clear_padding2(&c1);
+    __builtin_clear_padding(&c1);
     //TODO
   }
 }
@@ -339,13 +316,13 @@ void structTests() {
     s2.s.c = 'a';
     s2.b   = true;
 
-    assert2(memcmp(&s1, &s2, sizeof(S2)) != 0);
-    __builtin_clear_padding2(&s1);
-    assert2(s1.s.x == 4);
-    assert2(s1.s.c == 'a');
-    assert2(s1.b == true);
+    assert(memcmp(&s1, &s2, sizeof(S2)) != 0);
+    __builtin_clear_padding(&s1);
+    assert(s1.s.x == 4);
+    assert(s1.s.c == 'a');
+    assert(s1.b == true);
 
-    assert2(memcmp(&s1, &s2, sizeof(S2)) == 0);
+    assert(memcmp(&s1, &s2, sizeof(S2)) == 0);
   }
 
   // struct with long double
@@ -364,11 +341,11 @@ void structTests() {
     s2.l = 3.0L;
     s2.b = true;
 
-    assert2(memcmp(&s1, &s2, sizeof(S)) != 0);
-    __builtin_clear_padding2(&s1);
-    assert2(s1.l == 3.0L);
-    assert2(s1.b == true);
-    assert2(memcmp(&s1, &s2, sizeof(S)) == 0);
+    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    __builtin_clear_padding(&s1);
+    assert(s1.l == 3.0L);
+    assert(s1.b == true);
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
   }
 
   // EBO
@@ -390,11 +367,11 @@ void structTests() {
     s2.i = 4;
     s2.b = true;
 
-    assert2(memcmp(&s1, &s2, sizeof(S)) != 0);
-    __builtin_clear_padding2(&s1);
-    assert2(s1.i == 4);
-    assert2(s1.b == true);
-    assert2(memcmp(&s1, &s2, sizeof(S)) == 0);
+    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    __builtin_clear_padding(&s1);
+    assert(s1.i == 4);
+    assert(s1.b == true);
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
   }
 
   // padding between bases
@@ -417,11 +394,11 @@ void structTests() {
     s2.c1 = 'a';
     s2.c2 = 'b';
 
-    assert2(memcmp(&s1, &s2, sizeof(S)) != 0);
-    __builtin_clear_padding2(&s1);
-    assert2(s1.c1 == 'a');
-    assert2(s1.c2 == 'b');
-    assert2(memcmp(&s1, &s2, sizeof(S)) == 0);
+    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    __builtin_clear_padding(&s1);
+    assert(s1.c1 == 'a');
+    assert(s1.c2 == 'b');
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
   }
 
   // padding after last base
@@ -448,12 +425,12 @@ void structTests() {
     s2.c2 = 'b';
     s2.c3 = 'c';
 
-    assert2(memcmp(&s1, &s2, sizeof(S)) != 0);
-    __builtin_clear_padding2(&s1);
-    assert2(s1.c1 == 'a');
-    assert2(s1.c2 == 'b');
-    assert2(s1.c3 == 'c');
-    assert2(memcmp(&s1, &s2, sizeof(S)) == 0);
+    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    __builtin_clear_padding(&s1);
+    assert(s1.c1 == 'a');
+    assert(s1.c2 == 'b');
+    assert(s1.c3 == 'c');
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
   }
 
   // vtable
@@ -487,12 +464,12 @@ void structTests() {
     s2->y = 'a';
     s1->z = true;
     s2->z = true;
-    __builtin_clear_padding2(s2);
-    assert2(s2->x == 0xFFFFFFFF);
-    assert2(s2->y == 'a');
-    assert2(s2->z == true);
-    assert2(s2->call() == 5);
-    assert2(memcmp(s1, s2, sizeof(S)) == 0);
+    __builtin_clear_padding(s2);
+    assert(s2->x == 0xFFFFFFFF);
+    assert(s2->y == 'a');
+    assert(s2->z == true);
+    assert(s2->call() == 5);
+    assert(memcmp(s1, s2, sizeof(S)) == 0);
   }
 
   // multiple bases with vtable
@@ -542,14 +519,14 @@ void structTests() {
     s2->y  = 'a';
     s1->z  = true;
     s2->z  = true;
-    __builtin_clear_padding2(s2);
-    assert2(s2->x1 == 0xFFFFFFFF);
-    assert2(s2->x2 == 0xFAFAFAFA);
-    assert2(s2->x3 == 0xAAAAAAAA);
-    assert2(s2->y == 'a');
-    assert2(s2->z == true);
-    assert2(s2->call1() == 5);
-    assert2(memcmp(s1, s2, sizeof(S)) == 0);
+    __builtin_clear_padding(s2);
+    assert(s2->x1 == 0xFFFFFFFF);
+    assert(s2->x2 == 0xFAFAFAFA);
+    assert(s2->x3 == 0xAAAAAAAA);
+    assert(s2->y == 'a');
+    assert(s2->z == true);
+    assert(s2->call1() == 5);
+    assert(memcmp(s1, s2, sizeof(S)) == 0);
   }
 
   // chain of bases with virtual functions
@@ -598,8 +575,8 @@ void structTests() {
     s2->y  = 'a';
     s1->z  = true;
     s2->z  = true;
-    __builtin_clear_padding2(s2);
-    assert2(memcmp(s1, s2, sizeof(S)) == 0);
+    __builtin_clear_padding(s2);
+    assert(memcmp(s1, s2, sizeof(S)) == 0);
   }
 
   // virtual inheritance
@@ -639,8 +616,8 @@ void structTests() {
     s2->b2 = true;
     s1->s  = true;
     s2->s  = true;
-    __builtin_clear_padding2(s2);
-    assert2(memcmp(s1, s2, sizeof(S)) == 0);
+    __builtin_clear_padding(s2);
+    assert(memcmp(s1, s2, sizeof(S)) == 0);
   }
 
   // bit fields
@@ -665,8 +642,8 @@ void structTests() {
     s2.b3 = 27;
     s1.b4 = 3;
     s2.b4 = 3;
-    __builtin_clear_padding2(&s2);
-    assert2(memcmp(&s1, &s2, sizeof(S)) == 0);
+    __builtin_clear_padding(&s2);
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
   }
 
   testAllStructsForType<32, 16, char>(11, 22, 33, 44);
@@ -709,10 +686,10 @@ void unionTests() {
     u1.c = '4';
     u2.c = '4';
 
-    __builtin_clear_padding2(&u1); // should have no effect
-    assert2(u1.c == '4');
+    __builtin_clear_padding(&u1); // should have no effect
+    assert(u1.c == '4');
 
-    assert2(memcmp(&u1, &u2, sizeof(u)) == 0);
+    assert(memcmp(&u1, &u2, sizeof(u)) == 0);
   }
 
   // tail padding of longest member
@@ -733,10 +710,10 @@ void unionTests() {
     u1.s1.c1 = '4';
     u2.s1.c1 = '4';
 
-    assert2(memcmp(&u1, &u2, sizeof(u)) != 0);
-    __builtin_clear_padding2(&u1);
-    assert2(u1.s1.c1 == '4');
-    assert2(memcmp(&u1, &u2, sizeof(u)) == 0);
+    assert(memcmp(&u1, &u2, sizeof(u)) != 0);
+    __builtin_clear_padding(&u1);
+    assert(u1.s1.c1 == '4');
+    assert(memcmp(&u1, &u2, sizeof(u)) == 0);
   }
 }
 
@@ -746,10 +723,10 @@ void arrayTests() {
     int i1[2] = {1, 2};
     int i2[2] = {1, 2};
 
-    __builtin_clear_padding2(&i1);
-    assert2(i1[0] == 1);
-    assert2(i1[1] == 2);
-    assert2(memcmp(&i1, &i2, 2 * sizeof(int)) == 0);
+    __builtin_clear_padding(&i1);
+    assert(i1[0] == 1);
+    assert(i1[1] == 2);
+    assert(memcmp(&i1, &i2, 2 * sizeof(int)) == 0);
   }
 
   // long double
@@ -763,10 +740,10 @@ void arrayTests() {
     d2[0] = 3.0L;
     d2[1] = 4.0L;
 
-    __builtin_clear_padding2(&d1);
-    assert2(d1[0] == 3.0L);
-    assert2(d2[1] == 4.0L);
-    assert2(memcmp(&d1, &d2, 2 * sizeof(long double)) == 0);
+    __builtin_clear_padding(&d1);
+    assert(d1[0] == 3.0L);
+    assert(d2[1] == 4.0L);
+    assert(memcmp(&d1, &d2, 2 * sizeof(long double)) == 0);
   }
 
   // struct
@@ -800,18 +777,18 @@ void arrayTests() {
     s2[1].i2 = 4;
     s2[1].c2 = 'd';
 
-    assert2(memcmp(&s1, &s2, 2 * sizeof(S)) != 0);
-    __builtin_clear_padding2(&s1);
-
-    assert2(s1[0].i1 == 1);
-    assert2(s1[0].c1 == 'a');
-    assert2(s1[0].i2 == 2);
-    assert2(s1[0].c2 == 'b');
-    assert2(s1[1].i1 == 3);
-    assert2(s1[1].c1 == 'c');
-    assert2(s1[1].i2 == 4);
-    assert2(s1[1].c2 == 'd');
-    assert2(memcmp(&s1, &s2, 2 * sizeof(S)) == 0);
+    assert(memcmp(&s1, &s2, 2 * sizeof(S)) != 0);
+    __builtin_clear_padding(&s1);
+
+    assert(s1[0].i1 == 1);
+    assert(s1[0].c1 == 'a');
+    assert(s1[0].i2 == 2);
+    assert(s1[0].c2 == 'b');
+    assert(s1[1].i1 == 3);
+    assert(s1[1].c1 == 'c');
+    assert(s1[1].i2 == 4);
+    assert(s1[1].c2 == 'd');
+    assert(memcmp(&s1, &s2, 2 * sizeof(S)) == 0);
   }
 }
 

>From df69830bc22d750a7c50851e85aa362783c1000b Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sat, 29 Nov 2025 18:54:22 +0000
Subject: [PATCH 10/22] remove Float3Vec test as clang writes to padding on
 assignment even on zero initialised memory, makes comparison in test hard

---
 libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
index 8698cd26d2607..1b92e8e0f352c 100644
--- a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
+++ b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // UNSUPPORTED: c++03
 // UNSUPPORTED: gcc
-// UNSUPPORTED: clang-19, clang-20, clang-21, clang-22
+// UNSUPPORTED: clang-19, clang-20, clang-21
 
 // ADDITIONAL_COMPILE_FLAGS: -Wno-deprecated-volatile -Wno-dynamic-class-memaccess
 
@@ -254,7 +254,6 @@ struct Foo {
 };
 
 typedef float Float4Vec __attribute__((ext_vector_type(4)));
-typedef float Float3Vec __attribute__((ext_vector_type(3)));
 
 void primitiveTests() {
   // no padding
@@ -664,8 +663,6 @@ void structTests() {
   testAllStructsForType<64, 32, _BitInt(64)>(4, 5, 6, 7);
   testAllStructsForType<32, 16, Foo>(Foo{1, 2}, Foo{3, 4}, Foo{1, 2}, Foo{3, 4});
   testAllStructsForType<64, 32, Foo>(Foo{1, 2}, Foo{3, 4}, Foo{1, 2}, Foo{3, 4});
-  testAllStructsForType<256, 128, Float3Vec>(0, 1, 2, 3);
-  testAllStructsForType<128, 128, Float3Vec>(4, 5, 6, 7);
   testAllStructsForType<256, 128, Float4Vec>(0, 1, 2, 3);
   testAllStructsForType<128, 128, Float4Vec>(4, 5, 6, 7);
 

>From 5962daa78010049ad0b6a4f68313fa0aecc1b004 Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sat, 29 Nov 2025 18:58:23 +0000
Subject: [PATCH 11/22] remove llvm::dbgs

---
 clang/lib/CodeGen/CGBuiltin.cpp | 33 ---------------------------------
 1 file changed, 33 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 6631bc5d4d5a1..b1a0ae3d49f9f 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2728,15 +2728,6 @@ struct PaddingClearer {
     MergeOccuppiedIntervals();
     auto PaddingIntervals =
         GetPaddingIntervals(CGF.getContext().getTypeSize(Ty));
-    llvm::dbgs() << "Occuppied Bits:\n";
-    for (auto [first, last] : OccuppiedIntervals) {
-      llvm::dbgs() << "[" << first << ", " << last << ")\n";
-    }
-    llvm::dbgs() << "Padding Bits:\n";
-    for (auto [first, last] : PaddingIntervals) {
-      llvm::dbgs() << "[" << first << ", " << last << ")\n";
-    }
-
     for (const auto &Interval : PaddingIntervals) {
       ClearPadding(Ptr, Interval);
     }
@@ -2783,16 +2774,11 @@ struct PaddingClearer {
                        .getDataLayout()
                        .getTypeSizeInBits(Type)
                        .getKnownMinValue();
-    llvm::dbgs() << "clear_padding primitive type. adding Interval ["
-                 << D.StartBitOffset << ", " << D.StartBitOffset + SizeBit
-                 << ")\n";
     OccuppiedIntervals.push_back(
         BitInterval{D.StartBitOffset, D.StartBitOffset + SizeBit});
   }
 
   void VisitArray(const ConstantArrayType *AT, uint64_t StartBitOffset) {
-    llvm::dbgs() << "clear_padding visiting constant array starting from "
-                 << StartBitOffset << "\n";
     for (uint64_t ArrIndex = 0; ArrIndex < AT->getSize().getLimitedValue();
          ++ArrIndex) {
 
@@ -2809,17 +2795,10 @@ struct PaddingClearer {
 
   void VisitStruct(const CXXRecordDecl *R, uint64_t StartBitOffset,
                    bool VisitVirtualBase) {
-    llvm::dbgs() << "clear_padding visiting struct: "
-                 << R->getQualifiedNameAsString() << " starting from offset "
-                 << StartBitOffset << '\n';
     const auto &DL = CGF.CGM.getModule().getDataLayout();
 
     const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R);
     if (ASTLayout.hasOwnVFPtr()) {
-      llvm::dbgs()
-          << "clear_padding found vtable ptr. Adding occuppied interval ["
-          << StartBitOffset << ", "
-          << (StartBitOffset + DL.getPointerSizeInBits()) << ")\n";
       OccuppiedIntervals.push_back(BitInterval{
           StartBitOffset, StartBitOffset + DL.getPointerSizeInBits()});
     }
@@ -2828,14 +2807,11 @@ struct PaddingClearer {
                                const CXXBaseSpecifier &Base, auto GetOffset) {
       auto *BaseRecord = Base.getType()->getAsCXXRecordDecl();
       if (!BaseRecord) {
-        llvm::dbgs() << "Base is not a CXXRecord!\n";
         return;
       }
       auto BaseOffset =
           std::invoke(GetOffset, ASTLayout, BaseRecord).getQuantity();
 
-      llvm::dbgs() << "visiting base at offset " << StartBitOffset << " + "
-                   << BaseOffset * CharWidth << '\n';
       Queue.push_back(Data{StartBitOffset + BaseOffset * CharWidth,
                            Base.getType(), /*VisitVirtualBase*/ false});
     };
@@ -2854,12 +2830,7 @@ struct PaddingClearer {
 
     for (auto *Field : R->fields()) {
       auto FieldOffset = ASTLayout.getFieldOffset(Field->getFieldIndex());
-      llvm::dbgs() << "visiting field at offset " << StartBitOffset << " + "
-                   << FieldOffset << '\n';
       if (Field->isBitField()) {
-        llvm::dbgs() << "clear_padding found bit field. Adding Interval ["
-                     << StartBitOffset + FieldOffset << " , "
-                     << FieldOffset + Field->getBitWidthValue() << ")\n";
         OccuppiedIntervals.push_back(BitInterval{
             StartBitOffset + FieldOffset,
             StartBitOffset + FieldOffset + Field->getBitWidthValue()});
@@ -2876,10 +2847,6 @@ struct PaddingClearer {
     auto ElementAlign = CGF.getContext().getTypeAlignInChars(ElementQualType);
     auto ImgOffset = ElementSize.alignTo(ElementAlign);
 
-    llvm::dbgs() << "clear_padding visiting Complex Type. Real from "
-                 << StartBitOffset << "Img from "
-                 << StartBitOffset + ImgOffset.getQuantity() * CharWidth
-                 << "\n";
     Queue.push_back(
         Data{StartBitOffset, ElementQualType, /*VisitVirtualBase*/ true});
     Queue.push_back(Data{StartBitOffset + ImgOffset.getQuantity() * CharWidth,

>From 38348b88e2f7e7532b0f696ace6bcb5bf7a73b9d Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sat, 29 Nov 2025 19:13:25 +0000
Subject: [PATCH 12/22] disable clang-22

---
 libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
index 1b92e8e0f352c..d2e0430a57694 100644
--- a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
+++ b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // UNSUPPORTED: c++03
 // UNSUPPORTED: gcc
-// UNSUPPORTED: clang-19, clang-20, clang-21
+// UNSUPPORTED: clang-19, clang-20, clang-21, clang-22
 
 // ADDITIONAL_COMPILE_FLAGS: -Wno-deprecated-volatile -Wno-dynamic-class-memaccess
 

>From 2fe5de20eaa1af4a863196253deb9cfdf6cd5806 Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sun, 30 Nov 2025 08:02:58 +0000
Subject: [PATCH 13/22] disable libcxx test for apple-clang-17

---
 libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
index d2e0430a57694..8814613c44e7d 100644
--- a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
+++ b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // UNSUPPORTED: c++03
 // UNSUPPORTED: gcc
-// UNSUPPORTED: clang-19, clang-20, clang-21, clang-22
+// UNSUPPORTED: clang-19, clang-20, clang-21, clang-22, apple-clang-17
 
 // ADDITIONAL_COMPILE_FLAGS: -Wno-deprecated-volatile -Wno-dynamic-class-memaccess
 

>From af1331782782af7023988effc82170c3244dc854 Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sun, 1 Mar 2026 09:37:52 +0000
Subject: [PATCH 14/22] in progress address review

more comments

more comments

more comments

trivially copyable

more comments
---
 clang/include/clang/Basic/Builtins.td         |   2 +-
 .../clang/Basic/DiagnosticSemaKinds.td        |   3 +
 clang/lib/CodeGen/CGBuiltin.cpp               |  52 +++---
 clang/lib/Sema/SemaChecking.cpp               |  53 ++++--
 .../builtin-clear-padding-codegen.cpp         | 171 ++++++++++++------
 clang/test/Sema/builtin-clear-padding.c       |  18 ++
 clang/test/SemaCXX/builtin-clear-padding.cpp  |  73 +++++++-
 .../atomics/builtin_clear_padding.pass.cpp    | 160 +++++++++-------
 8 files changed, 368 insertions(+), 164 deletions(-)
 create mode 100644 clang/test/Sema/builtin-clear-padding.c

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index f2464add00acb..5a7c354cb9595 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1263,7 +1263,7 @@ def IsWithinLifetime : LangBuiltin<"CXX_LANG"> {
   let Prototype = "bool(void*)";
 }
 
-def ClearPadding : LangBuiltin<"CXX_LANG"> {
+def ClearPadding : Builtin {
   let Spellings = ["__builtin_clear_padding"];
   let Attributes = [NoThrow, CustomTypeChecking];
   let Prototype = "void(void*)";
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 1302c4296885b..f8bf54e28994b 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9680,6 +9680,9 @@ def warn_atomic_stripped_in_enum : Warning<
   "'_Atomic' qualifier ignored; operations involving the enumeration type will "
   "be non-atomic">,
   InGroup<DiagGroup<"underlying-atomic-qualifier-ignored">>, DefaultError;
+def err_clear_padding_needs_trivial_copy : Error<
+  "argument to __builtin_clear_padding must be a pointer to a "
+  "trivially-copyable type (%0 invalid)">;
 
 def err_overflow_builtin_must_be_int : Error<
   "operand argument to %select{overflow builtin|checked integer operation}0 "
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b1a0ae3d49f9f..72b8e01af44f1 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2704,7 +2704,7 @@ RValue CodeGenFunction::emitStdcFirstBit(const CallExpr *E, Intrinsic::ID IntID,
 namespace {
 
 // PaddingClearer is a utility class that clears padding bits in a
-// c++ type. It traverses the type recursively, collecting occupied
+// c/c++ type. It traverses the type recursively, collecting occupied
 // bit intervals, and then compute the padding intervals.
 // In the end, it clears the padding bits by writing zeros
 // to the padding intervals bytes-by-bytes. If a byte only contains
@@ -2716,12 +2716,12 @@ struct PaddingClearer {
 
   void run(Value *Ptr, QualType Ty) {
     OccuppiedIntervals.clear();
-    Queue.clear();
+    Stack.clear();
 
-    Queue.push_back(Data{0, Ty, true});
-    while (!Queue.empty()) {
-      auto Current = Queue.back();
-      Queue.pop_back();
+    Stack.push_back(Data{0, Ty, true});
+    while (!Stack.empty()) {
+      auto Current = Stack.back();
+      Stack.pop_back();
       Visit(Current);
     }
 
@@ -2746,7 +2746,7 @@ struct PaddingClearer {
     bool VisitVirtualBase;
   };
 
-  void Visit(Data const &D) {
+  void Visit(const Data &D) {
     if (auto *AT = dyn_cast<ConstantArrayType>(D.Ty)) {
       VisitArray(AT, D.StartBitOffset);
       return;
@@ -2760,7 +2760,7 @@ struct PaddingClearer {
     if (D.Ty->isAtomicType()) {
       auto Unwrapped = D;
       Unwrapped.Ty = D.Ty.getAtomicUnqualifiedType();
-      Queue.push_back(Unwrapped);
+      Stack.push_back(Unwrapped);
       return;
     }
 
@@ -2787,7 +2787,7 @@ struct PaddingClearer {
       auto ElementAlign = CGF.getContext().getTypeAlignInChars(ElementQualType);
       auto Offset = ElementSize.alignTo(ElementAlign);
 
-      Queue.push_back(
+      Stack.push_back(
           Data{StartBitOffset + ArrIndex * Offset.getQuantity() * CharWidth,
                ElementQualType, /*VisitVirtualBase*/ true});
     }
@@ -2812,7 +2812,7 @@ struct PaddingClearer {
       auto BaseOffset =
           std::invoke(GetOffset, ASTLayout, BaseRecord).getQuantity();
 
-      Queue.push_back(Data{StartBitOffset + BaseOffset * CharWidth,
+      Stack.push_back(Data{StartBitOffset + BaseOffset * CharWidth,
                            Base.getType(), /*VisitVirtualBase*/ false});
     };
 
@@ -2835,7 +2835,7 @@ struct PaddingClearer {
             StartBitOffset + FieldOffset,
             StartBitOffset + FieldOffset + Field->getBitWidthValue()});
       } else {
-        Queue.push_back(Data{StartBitOffset + FieldOffset, Field->getType(),
+        Stack.push_back(Data{StartBitOffset + FieldOffset, Field->getType(),
                              /*VisitVirtualBase*/ true});
       }
     }
@@ -2847,9 +2847,9 @@ struct PaddingClearer {
     auto ElementAlign = CGF.getContext().getTypeAlignInChars(ElementQualType);
     auto ImgOffset = ElementSize.alignTo(ElementAlign);
 
-    Queue.push_back(
+    Stack.push_back(
         Data{StartBitOffset, ElementQualType, /*VisitVirtualBase*/ true});
-    Queue.push_back(Data{StartBitOffset + ImgOffset.getQuantity() * CharWidth,
+    Stack.push_back(Data{StartBitOffset + ImgOffset.getQuantity() * CharWidth,
                          ElementQualType, /*VisitVirtualBase*/ true});
   }
 
@@ -2860,7 +2860,7 @@ struct PaddingClearer {
                        std::tie(rhs.First, rhs.Last);
               });
 
-    std::vector<BitInterval> Merged;
+    llvm::SmallVector<BitInterval> Merged;
     Merged.reserve(OccuppiedIntervals.size());
 
     for (const BitInterval &NextInterval : OccuppiedIntervals) {
@@ -2880,8 +2880,8 @@ struct PaddingClearer {
     OccuppiedIntervals = Merged;
   }
 
-  std::vector<BitInterval> GetPaddingIntervals(uint64_t SizeInBits) const {
-    std::vector<BitInterval> Results;
+  llvm::SmallVector<BitInterval> GetPaddingIntervals(uint64_t SizeInBits) const {
+    llvm::SmallVector<BitInterval> Results;
     if (OccuppiedIntervals.size() == 1 &&
         OccuppiedIntervals.front().First == 0 &&
         OccuppiedIntervals.end()->Last == SizeInBits) {
@@ -2920,8 +2920,10 @@ struct PaddingClearer {
       auto *Value = CGF.Builder.CreateLoad(ElementAddr);
 
       // Create mask to clear bits within the byte
-      uint8_t mask = ((1 << EndBit) - 1) & ~((1 << StartBit) - 1);
-      auto *MaskValue = ConstantInt::get(CGF.Int8Ty, mask);
+      // We want to clear bits from StartBit to EndBit-1
+      uint8_t bitsToClear = ((1 << EndBit) - 1) & ~((1 << StartBit) - 1);
+      uint8_t bitsToKeep = ~bitsToClear;
+      auto *MaskValue = ConstantInt::get(CGF.Int8Ty, bitsToKeep);
       auto *NewValue = CGF.Builder.CreateAnd(Value, MaskValue);
 
       CGF.Builder.CreateStore(NewValue, ElementAddr);
@@ -2934,8 +2936,9 @@ struct PaddingClearer {
 
         auto *Value = CGF.Builder.CreateLoad(ElementAddr);
 
-        uint8_t startMask = ((1 << (CharWidth - StartBit)) - 1) << StartBit;
-        auto *MaskValue = ConstantInt::get(CGF.Int8Ty, ~startMask);
+        uint8_t bitsToClear = ((1 << (CharWidth - StartBit)) - 1) << StartBit;
+        uint8_t bitsToKeep = ~bitsToClear;
+        auto *MaskValue = ConstantInt::get(CGF.Int8Ty, bitsToKeep);
         auto *NewValue = CGF.Builder.CreateAnd(Value, MaskValue);
 
         CGF.Builder.CreateStore(NewValue, ElementAddr);
@@ -2959,8 +2962,9 @@ struct PaddingClearer {
 
         auto *Value = CGF.Builder.CreateLoad(ElementAddr);
 
-        uint8_t endMask = (1 << EndBit) - 1;
-        auto *MaskValue = ConstantInt::get(CGF.Int8Ty, endMask);
+        uint8_t bitsToClear = (1 << EndBit) - 1;
+        uint8_t bitsToKeep = ~bitsToClear;
+        auto *MaskValue = ConstantInt::get(CGF.Int8Ty, bitsToKeep);
         auto *NewValue = CGF.Builder.CreateAnd(Value, MaskValue);
 
         CGF.Builder.CreateStore(NewValue, ElementAddr);
@@ -2970,8 +2974,8 @@ struct PaddingClearer {
 
   CodeGenFunction &CGF;
   const uint64_t CharWidth;
-  std::deque<Data> Queue;
-  std::vector<BitInterval> OccuppiedIntervals;
+  llvm::SmallVector<Data> Stack;
+  llvm::SmallVector<BitInterval> OccuppiedIntervals;
 };
 
 } // namespace
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index e175f06b5304c..df2ed87204255 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3125,17 +3125,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
   case Builtin::BI__builtin_trivially_relocate:
     return BuiltinTriviallyRelocate(*this, TheCall);
   case Builtin::BI__builtin_clear_padding: {
-    const auto numArgs = TheCall->getNumArgs();
-    if (numArgs < 1) {
-      Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_few_args_one)
-          << 0 /*function call*/ << "T*" << 0;
-      return ExprError();
-    }
-    if (numArgs > 1) {
-      Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_many_args_one)
-          << 0 /*function call*/ << "T*" << numArgs << 0;
+    if (checkArgCount(TheCall, 1))
       return ExprError();
-    }
 
     const Expr *PtrArg = TheCall->getArg(0);
     const QualType PtrArgType = PtrArg->getType();
@@ -3145,14 +3136,50 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
           << "pointer";
       return ExprError();
     }
-    if (PtrArgType->getPointeeType().isConstQualified()) {
+    QualType PointeeType = PtrArgType->getPointeeType();
+    if (PointeeType.isConstQualified()) {
       Diag(PtrArg->getBeginLoc(), diag::err_typecheck_assign_const)
-          << TheCall->getSourceRange() << 5 /*ConstUnknown*/;
+          << TheCall->getSourceRange() << 4 /*ConstUnknown*/;
       return ExprError();
     }
-    if (RequireCompleteType(PtrArg->getBeginLoc(), PtrArgType->getPointeeType(),
+    if (RequireCompleteType(PtrArg->getBeginLoc(), PointeeType,
                             diag::err_typecheck_decl_incomplete_type))
       return ExprError();
+
+    // For non trivially copyable types, we try to match gcc's behaviour.
+    // i.e. __builtin_clear_padding(&var) is OK as long as var is a complete
+    // object, either a local variable or a function parameter passed by value
+    auto IsAddrOfDeclExpr = [&]() {
+      const Expr *IgnoreCastsAndParens = PtrArg->IgnoreCasts();
+      IgnoreCastsAndParens = IgnoreCastsAndParens->IgnoreParens();
+      const auto *UnaryOp = dyn_cast<UnaryOperator>(IgnoreCastsAndParens);
+      if (!UnaryOp || UnaryOp->getOpcode() != UO_AddrOf)
+        return false;
+
+      const Expr *Operand = UnaryOp->getSubExpr()->IgnoreParens();
+      const auto *DeclRef = dyn_cast<DeclRefExpr>(Operand);
+      if (!DeclRef)
+        return false;
+
+      const auto *VarDecl = dyn_cast<::clang::VarDecl>(DeclRef->getDecl());
+      if (!VarDecl || VarDecl->getType()->isReferenceType())
+        return false;
+
+      // matching GCC behaviour
+      // __builtin_clear_padding((X*)&var) is fine as long X is the type of var
+      QualType VarQType = VarDecl->getType();
+      return PointeeType.getTypePtr() == VarQType.getTypePtr() ||
+             Context.hasSameUnqualifiedType(PointeeType, VarQType);
+    };
+
+    if (!PointeeType.isTriviallyCopyableType(Context) &&
+        !PointeeType->isAtomicType() // _Atomic is not copyable
+        && !IsAddrOfDeclExpr()) {
+      Diag(PtrArg->getBeginLoc(), diag::err_clear_padding_needs_trivial_copy)
+          << PtrArg->getType() << PtrArg->getSourceRange();
+      return ExprError();
+    }
+
     break;
   }
   case Builtin::BI__sync_fetch_and_add:
diff --git a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
index b9ce468b5aa64..5cf9f0e986261 100644
--- a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
+++ b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
@@ -458,21 +458,20 @@ struct S7 : VirtualBase, NonVirtualBase {
   bool z;
 };
 
-// CHECK-LABEL: define dso_local void @_Z10testVtableP2S7(
+// CHECK-LABEL: define dso_local void @_Z10testVtable2S7(
 // CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// CHECK-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// CHECK-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 15
 // CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
-// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // CHECK-NEXT:    ret void
 //
-void testVtable(S7 *s) {
+void testVtable(S7 s) {
   // "vtable ptr" [0-7], "x" [8-11], "y" [12], "z" [13], PAD [14-15]
-  __builtin_clear_padding(s);
+  __builtin_clear_padding(&s);
 }
 
 struct VirtualBase1 {
@@ -501,39 +500,38 @@ struct S8 : VirtualBase1, VirtualBase2, NonVirtualBase1, VirtualBase3 {
   bool z;
 };
 
-// CHECK-LABEL: define dso_local void @_Z23testMultipleBasesVtableP2S8(
+// CHECK-LABEL: define dso_local void @_Z23testMultipleBasesVtable2S8(
 // CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// CHECK-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 12
+// CHECK-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 13
 // CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 14
 // CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 15
 // CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
-// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 29
 // CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
-// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 30
 // CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
-// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 31
 // CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
-// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 45
 // CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
-// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 45
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 46
 // CHECK-NEXT:    store i8 0, ptr [[TMP8]], align 1
-// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 46
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 47
 // CHECK-NEXT:    store i8 0, ptr [[TMP9]], align 1
-// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 47
-// CHECK-NEXT:    store i8 0, ptr [[TMP10]], align 1
 // CHECK-NEXT:    ret void
 //
-void testMultipleBasesVtable(S8 *s) {
+void testMultipleBasesVtable(S8 s) {
   // "vtable ptr" [0-7], "x1" [8-11], PAD "[12-15]",
   // "vtable ptr" [16-23], "x2" [24-27], "y" [28], PAD "[29-31]",
   // "vtable_ptr" [32-39], "x3" [40-43], "z" [44], PAD [45-47]
-  __builtin_clear_padding(s);
+  __builtin_clear_padding(&s);
 }
 
 struct VirtualChain1 {
@@ -562,24 +560,23 @@ struct S9 : NonVirtualBase2, VirtualChain3 {
   bool z;
 };
 
-// CHECK-LABEL: define dso_local void @_Z16testVirtualChainP2S9(
+// CHECK-LABEL: define dso_local void @_Z16testVirtualChain2S9(
 // CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 22
+// CHECK-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 22
+// CHECK-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 23
 // CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 23
-// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // CHECK-NEXT:    ret void
 //
-void testVirtualChain(S9 *s) {
+void testVirtualChain(S9 s) {
   // This should clear the padding after the bool z.
   // base reordered
   // "vtable ptr" [0-7],  "x1" [8-11], x2 [12-15], x3 [16-19],
   // y [20], z [21], PAD [22-23]
-  __builtin_clear_padding(s);
+  __builtin_clear_padding(&s);
 }
 
 
@@ -600,38 +597,37 @@ struct S10 : D1, D2 {
   bool s;
 };
 
-// CHECK-LABEL: define dso_local void @_Z22testVirtualInheritanceP3S10(
-// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// CHECK-LABEL: define dso_local void @_Z22testVirtualInheritance3S10(
+// CHECK-SAME: ptr noundef dead_on_return [[S:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// CHECK-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 13
+// CHECK-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 14
 // CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 15
 // CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 30
 // CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
-// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 31
 // CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
-// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 36
 // CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
-// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 36
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 37
 // CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
-// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 37
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 38
 // CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
-// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 38
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 39
 // CHECK-NEXT:    store i8 0, ptr [[TMP8]], align 1
-// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 39
-// CHECK-NEXT:    store i8 0, ptr [[TMP9]], align 1
 // CHECK-NEXT:    ret void
 //
-void testVirtualInheritance(S10 *s) {
+void testVirtualInheritance(S10 s) {
   // note derived member placed before the virtual base
   // "vtable ptr" [0-7],  "d1" [8-11], "b1" [12], PAD [13-15],
   // "vtable ptr" [16-23],  "d2" [24-27], "b2" [28], s [29],  PAD [30-31],
   // "x" [32-35], PAD [36-39]
-  __builtin_clear_padding(s);
+  __builtin_clear_padding(&s);
 }
 
 struct S11 {
@@ -715,7 +711,7 @@ void testArrayLongDouble(long double (&arr)[2]) {
 // CHECK-LABEL: define dso_local void @_Z17testArrayOfStructv(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[S:%.*]] = alloca [2 x %struct.S.0], align 16
+// CHECK-NEXT:    [[S:%.*]] = alloca [2 x [[STRUCT_S_0:%.*]]], align 16
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 5
 // CHECK-NEXT:    store i8 0, ptr [[TMP0]], align 1
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 6
@@ -786,3 +782,72 @@ struct ArrOfStructsWithPadding {
 void testArrOfStructsWithPadding(ArrOfStructsWithPadding *arr) {
   __builtin_clear_padding(arr);
 }
+
+template <class T>
+struct S12 {
+  T t;
+  char c;
+};
+
+// CHECK-LABEL: define dso_local void @_Z18testTemplateStructP3S12IiE(
+// CHECK-SAME: ptr noundef [[S12:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S12_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[S12]], ptr [[S12_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S12_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    ret void
+//
+void testTemplateStruct(S12<int>* s12) {
+  __builtin_clear_padding(s12);
+}
+
+// CHECK-LABEL: define dso_local void @_Z10testAtomicPU7_Atomic3S12IiE(
+// CHECK-SAME: ptr noundef [[AS12:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[AS12_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[AS12]], ptr [[AS12_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[AS12_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// CHECK-NEXT:    ret void
+//
+void testAtomic(_Atomic(S12<int>)* as12) {
+  __builtin_clear_padding(as12);
+}
+
+
+struct NonTriviallyCopyable {
+  int i;
+  char c;
+
+  NonTriviallyCopyable(){}
+  NonTriviallyCopyable(const NonTriviallyCopyable&) {}
+  ~NonTriviallyCopyable() {}
+};
+
+// CHECK-LABEL: define dso_local void @_Z24testNonTriviallyCopyable20NonTriviallyCopyable(
+// CHECK-SAME: ptr noundef [[NTC:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[NTC_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[NTC]], ptr [[NTC_INDIRECT_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[NTC]], i32 5
+// CHECK-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[NTC]], i32 6
+// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[NTC]], i32 7
+// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// CHECK-NEXT:    ret void
+//
+void testNonTriviallyCopyable(NonTriviallyCopyable ntc) {
+  __builtin_clear_padding(&ntc);
+}
diff --git a/clang/test/Sema/builtin-clear-padding.c b/clang/test/Sema/builtin-clear-padding.c
new file mode 100644
index 0000000000000..4ec22fd5d5c70
--- /dev/null
+++ b/clang/test/Sema/builtin-clear-padding.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+struct Foo {};
+
+struct Incomplete; // expected-note {{forward declaration of 'struct Incomplete'}}
+
+void test(int a, struct Foo b, void *c, int *d, struct Foo *e, const struct Foo *f, struct Incomplete *g) {
+  __builtin_clear_padding(); // expected-error {{too few arguments to function call, expected 1, have 0}}
+  __builtin_clear_padding(d, d); // expected-error {{too many arguments to function call, expected 1, have 2}}
+
+  __builtin_clear_padding(a); // expected-error {{passing 'int' to parameter of incompatible type pointer: type mismatch at 1st parameter ('int' vs pointer)}}
+  __builtin_clear_padding(b); // expected-error {{passing 'struct Foo' to parameter of incompatible type pointer: type mismatch at 1st parameter ('struct Foo' vs pointer)}}
+  __builtin_clear_padding(c); // expected-error {{variable has incomplete type 'void'}}
+  __builtin_clear_padding(d); // This should not error.
+  __builtin_clear_padding(e); // This should not error.
+  __builtin_clear_padding(f); // expected-error {{read-only variable is not assignable}}
+  __builtin_clear_padding(g); // expected-error {{variable has incomplete type 'struct Incomplete'}}
+}
diff --git a/clang/test/SemaCXX/builtin-clear-padding.cpp b/clang/test/SemaCXX/builtin-clear-padding.cpp
index ea87249c87b0a..c3546f5d0e33d 100644
--- a/clang/test/SemaCXX/builtin-clear-padding.cpp
+++ b/clang/test/SemaCXX/builtin-clear-padding.cpp
@@ -2,14 +2,77 @@
 
 struct Foo {};
 
-struct Incomplete; // expected-note {{forward declaration of 'Incomplete'}}
-
-void test(int a, Foo b, void *c, int *d, Foo *e, const Foo *f, Incomplete *g) {
+void test(int a, Foo b, int *d, Foo *e, const Foo *f) {
   __builtin_clear_padding(a); // expected-error {{passing 'int' to parameter of incompatible type pointer: type mismatch at 1st parameter ('int' vs pointer)}}
   __builtin_clear_padding(b); // expected-error {{passing 'Foo' to parameter of incompatible type pointer: type mismatch at 1st parameter ('Foo' vs pointer)}}
-  __builtin_clear_padding(c); // expected-error {{variable has incomplete type 'void'}}
   __builtin_clear_padding(d); // This should not error.
   __builtin_clear_padding(e); // This should not error.
   __builtin_clear_padding(f); // expected-error {{read-only variable is not assignable}}
-  __builtin_clear_padding(g); // expected-error {{variable has incomplete type 'Incomplete'}}
+}
+
+struct Incomplete; // expected-note {{forward declaration of 'Incomplete'}}
+
+void testIncomplete(void* v, Incomplete *i) {
+  __builtin_clear_padding(v); // expected-error {{variable has incomplete type 'void'}}
+  __builtin_clear_padding(i); // expected-error {{variable has incomplete type 'Incomplete'}}
+}
+
+void testNumArgs(int* i) {
+  __builtin_clear_padding(); // expected-error {{too few arguments to function call, expected 1, have 0}}
+  __builtin_clear_padding(i); // This should not error.
+  __builtin_clear_padding(i, i); // expected-error {{too many arguments to function call, expected 1, have 2}}
+  __builtin_clear_padding(i, i, i); // expected-error {{too many arguments to function call, expected 1, have 3}}
+  __builtin_clear_padding(i, i, i, i); // expected-error {{too many arguments to function call, expected 1, have 4}}
+}
+
+struct NonTriviallyCopyable {
+  NonTriviallyCopyable() {}
+  NonTriviallyCopyable(const NonTriviallyCopyable&){}
+};
+
+struct DerivedNonTriviallyCopyable : NonTriviallyCopyable {};
+
+void testNonTriviallyCopyable(NonTriviallyCopyable& ntc0, NonTriviallyCopyable ntc1, DerivedNonTriviallyCopyable& dntc0, DerivedNonTriviallyCopyable dntc1) {
+  NonTriviallyCopyable ntc2;
+  NonTriviallyCopyable& ntc3 = ntc0;
+  DerivedNonTriviallyCopyable dntc2;
+  DerivedNonTriviallyCopyable& dntc3 = dntc0;
+
+  __builtin_clear_padding(&ntc0); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('NonTriviallyCopyable *' invalid)}}
+  __builtin_clear_padding(&ntc1);
+  __builtin_clear_padding(&ntc2);
+  __builtin_clear_padding(&ntc3); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('NonTriviallyCopyable *' invalid)}}
+
+  __builtin_clear_padding(&dntc0); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('DerivedNonTriviallyCopyable *' invalid)}}
+  __builtin_clear_padding(&dntc1);
+  __builtin_clear_padding(&dntc2);
+  __builtin_clear_padding(&dntc3); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('DerivedNonTriviallyCopyable *' invalid)}}
+
+  __builtin_clear_padding((NonTriviallyCopyable*)&ntc0); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('NonTriviallyCopyable *' invalid)}}
+  __builtin_clear_padding((NonTriviallyCopyable*)&ntc1);
+  __builtin_clear_padding((NonTriviallyCopyable*)&ntc2);
+  __builtin_clear_padding((NonTriviallyCopyable*)&ntc3); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('NonTriviallyCopyable *' invalid)}}
+
+  __builtin_clear_padding((DerivedNonTriviallyCopyable*)&dntc0); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('DerivedNonTriviallyCopyable *' invalid)}}
+  __builtin_clear_padding((DerivedNonTriviallyCopyable*)&dntc1);
+  __builtin_clear_padding((DerivedNonTriviallyCopyable*)&dntc2);
+  __builtin_clear_padding((DerivedNonTriviallyCopyable*)&dntc3); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('DerivedNonTriviallyCopyable *' invalid)}}
+
+  __builtin_clear_padding((NonTriviallyCopyable*)&dntc0); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('NonTriviallyCopyable *' invalid)}}
+  __builtin_clear_padding((NonTriviallyCopyable*)&dntc1); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('NonTriviallyCopyable *' invalid)}}
+  __builtin_clear_padding((NonTriviallyCopyable*)&dntc2); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('NonTriviallyCopyable *' invalid)}}
+  __builtin_clear_padding((NonTriviallyCopyable*)&dntc3); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('NonTriviallyCopyable *' invalid)}}
+}
+
+struct Bar {
+  Foo *foo;
+};
+
+void testMemberPointer(Foo* Bar::*mp) {
+  __builtin_clear_padding(mp); // expected-error {{passing 'Foo *Bar::*' to parameter of incompatible type pointer: type mismatch at 1st parameter ('Foo *Bar::*' vs pointer)}}
+}
+
+
+void testFunctionPointer(void(*f)()) {
+  __builtin_clear_padding(f); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('void (*)()' invalid)}}
 }
diff --git a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
index 8814613c44e7d..fad2b0981e487 100644
--- a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
+++ b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
@@ -454,21 +454,27 @@ void structTests() {
     memset(buff1, 0, sizeof(S));
     memset(buff2, 42, sizeof(S));
 
-    S* s1 = new (&buff1) S;
-    S* s2 = new (&buff2) S;
-
-    s1->x = 0xFFFFFFFF;
-    s2->x = 0xFFFFFFFF;
-    s1->y = 'a';
-    s2->y = 'a';
-    s1->z = true;
-    s2->z = true;
-    __builtin_clear_padding(s2);
-    assert(s2->x == 0xFFFFFFFF);
-    assert(s2->y == 'a');
-    assert(s2->z == true);
-    assert(s2->call() == 5);
-    assert(memcmp(s1, s2, sizeof(S)) == 0);
+    S* s1_data = new (&buff1) S;
+    S* s2_data = new (&buff2) S;
+
+    S s1;
+    S s2;
+
+    std::memcpy(&s1, s1_data, sizeof(S));
+    std::memcpy(&s2, s2_data, sizeof(S));
+
+    s1.x = 0xFFFFFFFF;
+    s2.x = 0xFFFFFFFF;
+    s1.y = 'a';
+    s2.y = 'a';
+    s1.z = true;
+    s2.z = true;
+    __builtin_clear_padding(&s2);
+    assert(s2.x == 0xFFFFFFFF);
+    assert(s2.y == 'a');
+    assert(s2.z == true);
+    assert(s2.call() == 5);
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
   }
 
   // multiple bases with vtable
@@ -505,27 +511,33 @@ void structTests() {
     memset(buff1, 0, sizeof(S));
     memset(buff2, 42, sizeof(S));
 
-    S* s1 = new (&buff1) S;
-    S* s2 = new (&buff2) S;
-
-    s1->x1 = 0xFFFFFFFF;
-    s2->x1 = 0xFFFFFFFF;
-    s1->x2 = 0xFAFAFAFA;
-    s2->x2 = 0xFAFAFAFA;
-    s1->x3 = 0xAAAAAAAA;
-    s2->x3 = 0xAAAAAAAA;
-    s1->y  = 'a';
-    s2->y  = 'a';
-    s1->z  = true;
-    s2->z  = true;
-    __builtin_clear_padding(s2);
-    assert(s2->x1 == 0xFFFFFFFF);
-    assert(s2->x2 == 0xFAFAFAFA);
-    assert(s2->x3 == 0xAAAAAAAA);
-    assert(s2->y == 'a');
-    assert(s2->z == true);
-    assert(s2->call1() == 5);
-    assert(memcmp(s1, s2, sizeof(S)) == 0);
+    S* s1_data = new (&buff1) S;
+    S* s2_data = new (&buff2) S;
+
+    S s1;
+    S s2;
+
+    std::memcpy(&s1, s1_data, sizeof(S));
+    std::memcpy(&s2, s2_data, sizeof(S));
+
+    s1.x1 = 0xFFFFFFFF;
+    s2.x1 = 0xFFFFFFFF;
+    s1.x2 = 0xFAFAFAFA;
+    s2.x2 = 0xFAFAFAFA;
+    s1.x3 = 0xAAAAAAAA;
+    s2.x3 = 0xAAAAAAAA;
+    s1.y  = 'a';
+    s2.y  = 'a';
+    s1.z  = true;
+    s2.z  = true;
+    __builtin_clear_padding(&s2);
+    assert(s2.x1 == 0xFFFFFFFF);
+    assert(s2.x2 == 0xFAFAFAFA);
+    assert(s2.x3 == 0xAAAAAAAA);
+    assert(s2.y == 'a');
+    assert(s2.z == true);
+    assert(s2.call1() == 5);
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
   }
 
   // chain of bases with virtual functions
@@ -561,21 +573,27 @@ void structTests() {
     char buff2[sizeof(S)];
     memset(buff1, 0, sizeof(S));
     memset(buff2, 42, sizeof(S));
-    S* s1 = new (&buff1) S;
-    S* s2 = new (&buff2) S;
-
-    s1->x1 = 0xFFFFFFFF;
-    s2->x1 = 0xFFFFFFFF;
-    s1->x2 = 0xFAFAFAFA;
-    s2->x2 = 0xFAFAFAFA;
-    s1->x3 = 0xAAAAAAAA;
-    s2->x3 = 0xAAAAAAAA;
-    s1->y  = 'a';
-    s2->y  = 'a';
-    s1->z  = true;
-    s2->z  = true;
-    __builtin_clear_padding(s2);
-    assert(memcmp(s1, s2, sizeof(S)) == 0);
+    S* s1_data = new (&buff1) S;
+    S* s2_data = new (&buff2) S;
+
+    S s1;
+    S s2;
+
+    std::memcpy(&s1, s1_data, sizeof(S));
+    std::memcpy(&s2, s2_data, sizeof(S));
+
+    s1.x1 = 0xFFFFFFFF;
+    s2.x1 = 0xFFFFFFFF;
+    s1.x2 = 0xFAFAFAFA;
+    s2.x2 = 0xFAFAFAFA;
+    s1.x3 = 0xAAAAAAAA;
+    s2.x3 = 0xAAAAAAAA;
+    s1.y  = 'a';
+    s2.y  = 'a';
+    s1.z  = true;
+    s2.z  = true;
+    __builtin_clear_padding(&s2);
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
   }
 
   // virtual inheritance
@@ -600,23 +618,29 @@ void structTests() {
     char buff2[sizeof(S)];
     memset(buff1, 0, sizeof(S));
     memset(buff2, 42, sizeof(S));
-    S* s1 = new (&buff1) S;
-    S* s2 = new (&buff2) S;
-
-    s1->x  = 0xFFFFFFFF;
-    s2->x  = 0xFFFFFFFF;
-    s1->d1 = 0xFAFAFAFA;
-    s2->d1 = 0xFAFAFAFA;
-    s1->d2 = 0xAAAAAAAA;
-    s2->d2 = 0xAAAAAAAA;
-    s1->b1 = true;
-    s2->b1 = true;
-    s1->b2 = true;
-    s2->b2 = true;
-    s1->s  = true;
-    s2->s  = true;
-    __builtin_clear_padding(s2);
-    assert(memcmp(s1, s2, sizeof(S)) == 0);
+    S* s1_data = new (&buff1) S;
+    S* s2_data = new (&buff2) S;
+
+    S s1;
+    S s2;
+
+    std::memcpy(&s1, s1_data, sizeof(S));
+    std::memcpy(&s2, s2_data, sizeof(S));
+
+    s1.x  = 0xFFFFFFFF;
+    s2.x  = 0xFFFFFFFF;
+    s1.d1 = 0xFAFAFAFA;
+    s2.d1 = 0xFAFAFAFA;
+    s1.d2 = 0xAAAAAAAA;
+    s2.d2 = 0xAAAAAAAA;
+    s1.b1 = true;
+    s2.b1 = true;
+    s1.b2 = true;
+    s2.b2 = true;
+    s1.s  = true;
+    s2.s  = true;
+    __builtin_clear_padding(&s2);
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
   }
 
   // bit fields

>From 74f58940bdc6f284c3d8b0b2200ac7978b1ed82b Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sun, 1 Mar 2026 20:11:16 +0000
Subject: [PATCH 15/22] windows

---
 .../builtin-clear-padding-codegen.cpp         | 1406 +++++++++++------
 1 file changed, 946 insertions(+), 460 deletions(-)

diff --git a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
index 5cf9f0e986261..bb121fba9db17 100644
--- a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
+++ b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
@@ -1,18 +1,29 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -std=c++20 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -std=c++20 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=LINUX
+// RUN: %clang_cc1 -std=c++20 -triple=x86_64-windows-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=WINDOWS
 
 
 struct Empty {};
 
-// CHECK-LABEL: define dso_local void @_Z9testEmptyP5Empty(
-// CHECK-SAME: ptr noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[E_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[E]], ptr [[E_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z9testEmptyP5Empty(
+// LINUX-SAME: ptr noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[E_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[E]], ptr [[E_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z9testEmptyP5Empty(
+// WINDOWS-SAME: ptr noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[E_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[E]], ptr [[E_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testEmpty(Empty *e) {
   // This should clear the one byte that Empty occupies.
@@ -20,13 +31,21 @@ void testEmpty(Empty *e) {
 }
 
 
-// CHECK-LABEL: define dso_local void @_Z22testPrimitiveNoPaddingPi(
-// CHECK-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[I]], ptr [[I_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z22testPrimitiveNoPaddingPi(
+// LINUX-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[I_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[I]], ptr [[I_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z22testPrimitiveNoPaddingPi(
+// WINDOWS-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[I_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[I]], ptr [[I_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// WINDOWS-NEXT:    ret void
 //
 void testPrimitiveNoPadding(int *i) {
   // This should not clear any padding, since int has no padding.
@@ -34,25 +53,45 @@ void testPrimitiveNoPadding(int *i) {
 }
 
 
-// CHECK-LABEL: define dso_local void @_Z23testPrimitiveLongDoublePe(
-// CHECK-SAME: ptr noundef [[LD:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[LD]], ptr [[LD_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[LD_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
-// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
-// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
-// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
-// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
-// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
-// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
-// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
-// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z23testPrimitiveLongDoublePe(
+// LINUX-SAME: ptr noundef [[LD:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[LD_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[LD]], ptr [[LD_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[LD_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z23testPrimitiveLongDoublePe(
+// WINDOWS-SAME: ptr noundef [[LD:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[LD_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[LD]], ptr [[LD_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[LD_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testPrimitiveLongDouble(long double *ld) {
   // padding [10, 15] on x86
@@ -60,37 +99,69 @@ void testPrimitiveLongDouble(long double *ld) {
 }
 
 
-// CHECK-LABEL: define dso_local void @_Z30testPrimitiveComplexLongDoublePCe(
-// CHECK-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
-// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
-// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
-// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
-// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
-// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
-// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
-// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
-// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
-// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
-// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
-// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
-// CHECK-NEXT:    store i8 0, ptr [[TMP8]], align 1
-// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
-// CHECK-NEXT:    store i8 0, ptr [[TMP9]], align 1
-// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
-// CHECK-NEXT:    store i8 0, ptr [[TMP10]], align 1
-// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
-// CHECK-NEXT:    store i8 0, ptr [[TMP11]], align 1
-// CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
-// CHECK-NEXT:    store i8 0, ptr [[TMP12]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z30testPrimitiveComplexLongDoublePCe(
+// LINUX-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
+// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// LINUX-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// LINUX-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z30testPrimitiveComplexLongDoublePCe(
+// WINDOWS-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// WINDOWS-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testPrimitiveComplexLongDouble(_Complex long double *c) {
   // padding [10, 15] and [26, 31] on x86
@@ -102,13 +173,21 @@ union U1 {
   char c;
 };
 
-// CHECK-LABEL: define dso_local void @_Z24testUnionDifferentLengthP2U1(
-// CHECK-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z24testUnionDifferentLengthP2U1(
+// LINUX-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z24testUnionDifferentLengthP2U1(
+// WINDOWS-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// WINDOWS-NEXT:    ret void
 //
 void testUnionDifferentLength(U1 *u) {
   // This should not clear the object representation bits of the non-active member.
@@ -125,27 +204,49 @@ union U2 {
   char c2;
 };
 
-// CHECK-LABEL: define dso_local void @_Z35testUnionTailPaddingOfLongestMemberP2U2(
-// CHECK-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
-// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
-// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
-// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
-// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
-// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
-// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
-// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
-// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
-// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z35testUnionTailPaddingOfLongestMemberP2U2(
+// LINUX-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z35testUnionTailPaddingOfLongestMemberP2U2(
+// WINDOWS-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testUnionTailPaddingOfLongestMember(U2 *u) {
   // This should clear the tail padding of the longest member.
@@ -175,54 +276,98 @@ struct alignas(4) Baz : Foo {
 // %struct.Foo = type { i8, i8, i8, i8 }
 // %struct.Bar = type { i8, i8, i8, i8 }
 
-// CHECK-LABEL: define dso_local void @_Z33testStructPaddingInBetweenMembersP3Baz(
-// CHECK-SAME: ptr noundef [[BAZ:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[BAZ_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[BAZ]], ptr [[BAZ_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAZ_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
-// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
-// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
-// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
-// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
-// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
-// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
-// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
-// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
-// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z33testStructPaddingInBetweenMembersP3Baz(
+// LINUX-SAME: ptr noundef [[BAZ:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[BAZ_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[BAZ]], ptr [[BAZ_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAZ_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z33testStructPaddingInBetweenMembersP3Baz(
+// WINDOWS-SAME: ptr noundef [[BAZ:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[BAZ_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[BAZ]], ptr [[BAZ_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAZ_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testStructPaddingInBetweenMembers(Baz *baz) {
   // this should clear all the padding in between various members
   __builtin_clear_padding(baz);
 }
 
-// CHECK-LABEL: define dso_local void @_Z18testStructVolatilePV3Baz(
-// CHECK-SAME: ptr noundef [[BAZ:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[BAZ_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[BAZ]], ptr [[BAZ_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAZ_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
-// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
-// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
-// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
-// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
-// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
-// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
-// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
-// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
-// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z18testStructVolatilePV3Baz(
+// LINUX-SAME: ptr noundef [[BAZ:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[BAZ_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[BAZ]], ptr [[BAZ_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAZ_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z18testStructVolatilePV3Baz(
+// WINDOWS-SAME: ptr noundef [[BAZ:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[BAZ_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[BAZ]], ptr [[BAZ_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAZ_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testStructVolatile(volatile Baz *baz) {
   // this should clear all the padding in between various members
@@ -240,21 +385,37 @@ struct UnsizedTail {
 // "size", PAD_1, PAD_2, PAD_3, PAD_4
 // %struct.UnsizedTail = type { i32, [4 x i8], [0 x i8] }
 
-// CHECK-LABEL: define dso_local void @_Z21testStructUnsizedTailP11UnsizedTail(
-// CHECK-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
-// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
-// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
-// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z21testStructUnsizedTailP11UnsizedTail(
+// LINUX-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z21testStructUnsizedTailP11UnsizedTail(
+// WINDOWS-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testStructUnsizedTail(UnsizedTail *u) {
   __builtin_clear_padding(u);
@@ -270,17 +431,29 @@ class S2 {
   bool b;
 };
 
-// CHECK-LABEL: define dso_local void @_Z19testNoUniqueAddressP2S2(
-// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
-// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z19testNoUniqueAddressP2S2(
+// LINUX-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z19testNoUniqueAddressP2S2(
+// WINDOWS-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testNoUniqueAddress(S2 *s) {
   // "x [0-3]",  "c" , "b", PAD [6-7]
@@ -292,55 +465,105 @@ struct S3 {
   bool b;
 };
 
-// CHECK-LABEL: define dso_local void @_Z24testStructWithLongDoubleP2S3(
-// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
-// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
-// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
-// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
-// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
-// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
-// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
-// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
-// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
-// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 17
-// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
-// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 18
-// CHECK-NEXT:    store i8 0, ptr [[TMP8]], align 1
-// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 19
-// CHECK-NEXT:    store i8 0, ptr [[TMP9]], align 1
-// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 20
-// CHECK-NEXT:    store i8 0, ptr [[TMP10]], align 1
-// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 21
-// CHECK-NEXT:    store i8 0, ptr [[TMP11]], align 1
-// CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 22
-// CHECK-NEXT:    store i8 0, ptr [[TMP12]], align 1
-// CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP0]], i32 23
-// CHECK-NEXT:    store i8 0, ptr [[TMP13]], align 1
-// CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i32 24
-// CHECK-NEXT:    store i8 0, ptr [[TMP14]], align 1
-// CHECK-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TMP0]], i32 25
-// CHECK-NEXT:    store i8 0, ptr [[TMP15]], align 1
-// CHECK-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
-// CHECK-NEXT:    store i8 0, ptr [[TMP16]], align 1
-// CHECK-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
-// CHECK-NEXT:    store i8 0, ptr [[TMP17]], align 1
-// CHECK-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
-// CHECK-NEXT:    store i8 0, ptr [[TMP18]], align 1
-// CHECK-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
-// CHECK-NEXT:    store i8 0, ptr [[TMP19]], align 1
-// CHECK-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
-// CHECK-NEXT:    store i8 0, ptr [[TMP20]], align 1
-// CHECK-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
-// CHECK-NEXT:    store i8 0, ptr [[TMP21]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z24testStructWithLongDoubleP2S3(
+// LINUX-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 17
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 18
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 19
+// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 20
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 21
+// LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// LINUX-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 22
+// LINUX-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// LINUX-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP0]], i32 23
+// LINUX-NEXT:    store i8 0, ptr [[TMP13]], align 1
+// LINUX-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i32 24
+// LINUX-NEXT:    store i8 0, ptr [[TMP14]], align 1
+// LINUX-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TMP0]], i32 25
+// LINUX-NEXT:    store i8 0, ptr [[TMP15]], align 1
+// LINUX-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
+// LINUX-NEXT:    store i8 0, ptr [[TMP16]], align 1
+// LINUX-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
+// LINUX-NEXT:    store i8 0, ptr [[TMP17]], align 1
+// LINUX-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
+// LINUX-NEXT:    store i8 0, ptr [[TMP18]], align 1
+// LINUX-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// LINUX-NEXT:    store i8 0, ptr [[TMP19]], align 1
+// LINUX-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// LINUX-NEXT:    store i8 0, ptr [[TMP20]], align 1
+// LINUX-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// LINUX-NEXT:    store i8 0, ptr [[TMP21]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z24testStructWithLongDoubleP2S3(
+// WINDOWS-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 17
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 18
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 19
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 20
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 21
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// WINDOWS-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 22
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// WINDOWS-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP0]], i32 23
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP13]], align 1
+// WINDOWS-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i32 24
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP14]], align 1
+// WINDOWS-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TMP0]], i32 25
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP15]], align 1
+// WINDOWS-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP16]], align 1
+// WINDOWS-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP17]], align 1
+// WINDOWS-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP18]], align 1
+// WINDOWS-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP19]], align 1
+// WINDOWS-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP20]], align 1
+// WINDOWS-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP21]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testStructWithLongDouble(S3 *s) {
   // "long double data[0-9]", PAD [10-15], "b", PAD [17-31]
@@ -354,19 +577,33 @@ struct S4 : Empty, B {
   bool b;
 };
 
-// CHECK-LABEL: define dso_local void @_Z23testStructWithEmptyBaseP2S4(
-// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
-// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z23testStructWithEmptyBaseP2S4(
+// LINUX-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z23testStructWithEmptyBaseP2S4(
+// WINDOWS-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testStructWithEmptyBase(S4 *s) {
   // "i" [0-3], "b" [4], PAD [5-7]
@@ -382,25 +619,45 @@ struct B2 {
 struct S5 : B1, B2 {
 };
 
-// CHECK-LABEL: define dso_local void @_Z23testPaddingBetweenBasesP2S5(
-// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
-// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
-// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
-// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
-// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
-// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
-// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
-// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z23testPaddingBetweenBasesP2S5(
+// LINUX-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z23testPaddingBetweenBasesP2S5(
+// WINDOWS-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testPaddingBetweenBases(S5 *s) {
   // "c1" [0], PAD [1-3] , "c2" [4], PAD [5-7]
@@ -419,23 +676,41 @@ struct S6 : B3, B4 {
   alignas(4) char c3;
 };
 
-// CHECK-LABEL: define dso_local void @_Z24testPaddingAfterLastBaseP2S6(
-// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
-// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
-// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
-// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
-// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
-// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z24testPaddingAfterLastBaseP2S6(
+// LINUX-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z24testPaddingAfterLastBaseP2S6(
+// WINDOWS-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testPaddingAfterLastBase(S6 *s) {
   // "c1"[0], "c2"[1], PAD [2-3], "c3" [4], PAD [5-7]
@@ -458,16 +733,27 @@ struct S7 : VirtualBase, NonVirtualBase {
   bool z;
 };
 
-// CHECK-LABEL: define dso_local void @_Z10testVtable2S7(
-// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 14
-// CHECK-NEXT:    store i8 0, ptr [[TMP0]], align 1
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 15
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z10testVtable2S7(
+// LINUX-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z10testVtable2S7(
+// WINDOWS-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testVtable(S7 s) {
   // "vtable ptr" [0-7], "x" [8-11], "y" [12], "z" [13], PAD [14-15]
@@ -500,32 +786,59 @@ struct S8 : VirtualBase1, VirtualBase2, NonVirtualBase1, VirtualBase3 {
   bool z;
 };
 
-// CHECK-LABEL: define dso_local void @_Z23testMultipleBasesVtable2S8(
-// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 12
-// CHECK-NEXT:    store i8 0, ptr [[TMP0]], align 1
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 13
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 14
-// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 15
-// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
-// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 29
-// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
-// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 30
-// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
-// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 31
-// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
-// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 45
-// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
-// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 46
-// CHECK-NEXT:    store i8 0, ptr [[TMP8]], align 1
-// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 47
-// CHECK-NEXT:    store i8 0, ptr [[TMP9]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z23testMultipleBasesVtable2S8(
+// LINUX-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 12
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 29
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 30
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 31
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 45
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 46
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 47
+// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z23testMultipleBasesVtable2S8(
+// WINDOWS-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 12
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 29
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 30
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 31
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 45
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 46
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 47
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testMultipleBasesVtable(S8 s) {
   // "vtable ptr" [0-7], "x1" [8-11], PAD "[12-15]",
@@ -560,16 +873,27 @@ struct S9 : NonVirtualBase2, VirtualChain3 {
   bool z;
 };
 
-// CHECK-LABEL: define dso_local void @_Z16testVirtualChain2S9(
-// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 22
-// CHECK-NEXT:    store i8 0, ptr [[TMP0]], align 1
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 23
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z16testVirtualChain2S9(
+// LINUX-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 22
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 23
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z16testVirtualChain2S9(
+// WINDOWS-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 22
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 23
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testVirtualChain(S9 s) {
   // This should clear the padding after the bool z.
@@ -597,30 +921,55 @@ struct S10 : D1, D2 {
   bool s;
 };
 
-// CHECK-LABEL: define dso_local void @_Z22testVirtualInheritance3S10(
-// CHECK-SAME: ptr noundef dead_on_return [[S:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 13
-// CHECK-NEXT:    store i8 0, ptr [[TMP0]], align 1
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 14
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 15
-// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 30
-// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
-// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 31
-// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
-// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 36
-// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
-// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 37
-// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
-// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 38
-// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
-// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 39
-// CHECK-NEXT:    store i8 0, ptr [[TMP8]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z22testVirtualInheritance3S10(
+// LINUX-SAME: ptr noundef dead_on_return [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 30
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 31
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 36
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 37
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 38
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 39
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z22testVirtualInheritance3S10(
+// WINDOWS-SAME: ptr noundef dead_on_return [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 30
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 31
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 36
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 37
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 38
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 39
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testVirtualInheritance(S10 s) {
   // note derived member placed before the virtual base
@@ -638,17 +987,29 @@ struct S11 {
   unsigned char b4 : 2; // 2 bits for b4 - next (and final) bits in the 2nd byte
 };
 
-// CHECK-LABEL: define dso_local void @_Z13testBitFieldsP3S11(
-// CHECK-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
-// CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], 31
-// CHECK-NEXT:    store i8 [[TMP3]], ptr [[TMP1]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z13testBitFieldsP3S11(
+// LINUX-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
+// LINUX-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], 31
+// LINUX-NEXT:    store i8 [[TMP3]], ptr [[TMP1]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z13testBitFieldsP3S11(
+// WINDOWS-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
+// WINDOWS-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], 31
+// WINDOWS-NEXT:    store i8 [[TMP3]], ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testBitFields(S11 *s) {
   // "b1" [0-2], "b2" [3-4], PAD [5-7], "b3" [8-13], "b4" [14-15]
@@ -657,11 +1018,17 @@ void testBitFields(S11 *s) {
 }
 
 
-// CHECK-LABEL: define dso_local void @_Z18testArrayNoPaddingv(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[I:%.*]] = alloca [4 x i32], align 16
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z18testArrayNoPaddingv(
+// LINUX-SAME: ) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[I:%.*]] = alloca [4 x i32], align 16
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z18testArrayNoPaddingv(
+// WINDOWS-SAME: ) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[I:%.*]] = alloca [4 x i32], align 16
+// WINDOWS-NEXT:    ret void
 //
 void testArrayNoPadding() {
   int i[4];
@@ -669,37 +1036,69 @@ void testArrayNoPadding() {
   __builtin_clear_padding(&i);
 }
 
-// CHECK-LABEL: define dso_local void @_Z19testArrayLongDoubleRA2_e(
-// CHECK-SAME: ptr noundef nonnull align 16 dereferenceable(32) [[ARR:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[ARR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    [[LD:%.*]] = alloca [2 x x86_fp80], align 16
-// CHECK-NEXT:    store ptr [[ARR]], ptr [[ARR_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[LD]], i32 10
-// CHECK-NEXT:    store i8 0, ptr [[TMP0]], align 1
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[LD]], i32 11
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[LD]], i32 12
-// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[LD]], i32 13
-// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
-// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[LD]], i32 14
-// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
-// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[LD]], i32 15
-// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
-// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[LD]], i32 26
-// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
-// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[LD]], i32 27
-// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
-// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[LD]], i32 28
-// CHECK-NEXT:    store i8 0, ptr [[TMP8]], align 1
-// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[LD]], i32 29
-// CHECK-NEXT:    store i8 0, ptr [[TMP9]], align 1
-// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[LD]], i32 30
-// CHECK-NEXT:    store i8 0, ptr [[TMP10]], align 1
-// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[LD]], i32 31
-// CHECK-NEXT:    store i8 0, ptr [[TMP11]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z19testArrayLongDoubleRA2_e(
+// LINUX-SAME: ptr noundef nonnull align 16 dereferenceable(32) [[ARR:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[ARR_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    [[LD:%.*]] = alloca [2 x x86_fp80], align 16
+// LINUX-NEXT:    store ptr [[ARR]], ptr [[ARR_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[LD]], i32 10
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[LD]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[LD]], i32 12
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[LD]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[LD]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[LD]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[LD]], i32 26
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[LD]], i32 27
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[LD]], i32 28
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[LD]], i32 29
+// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[LD]], i32 30
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[LD]], i32 31
+// LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z19testArrayLongDoubleRA2_e(
+// WINDOWS-SAME: ptr noundef nonnull align 16 dereferenceable(32) [[ARR:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[ARR_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    [[LD:%.*]] = alloca [2 x x86_fp80], align 16
+// WINDOWS-NEXT:    store ptr [[ARR]], ptr [[ARR_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[LD]], i32 10
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[LD]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[LD]], i32 12
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[LD]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[LD]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[LD]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[LD]], i32 26
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[LD]], i32 27
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[LD]], i32 28
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[LD]], i32 29
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[LD]], i32 30
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[LD]], i32 31
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testArrayLongDouble(long double (&arr)[2]) {
   // long double 0, [0-9] PAD [10-15]
@@ -708,35 +1107,65 @@ void testArrayLongDouble(long double (&arr)[2]) {
   __builtin_clear_padding(&ld);
 }
 
-// CHECK-LABEL: define dso_local void @_Z17testArrayOfStructv(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[S:%.*]] = alloca [2 x [[STRUCT_S_0:%.*]]], align 16
-// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 5
-// CHECK-NEXT:    store i8 0, ptr [[TMP0]], align 1
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 6
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 7
-// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 13
-// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
-// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 14
-// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
-// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 15
-// CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
-// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 21
-// CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
-// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 22
-// CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
-// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 23
-// CHECK-NEXT:    store i8 0, ptr [[TMP8]], align 1
-// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 29
-// CHECK-NEXT:    store i8 0, ptr [[TMP9]], align 1
-// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[S]], i32 30
-// CHECK-NEXT:    store i8 0, ptr [[TMP10]], align 1
-// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[S]], i32 31
-// CHECK-NEXT:    store i8 0, ptr [[TMP11]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z17testArrayOfStructv(
+// LINUX-SAME: ) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S:%.*]] = alloca [2 x [[STRUCT_S_0:%.*]]], align 16
+// LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 21
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 22
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 23
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 29
+// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[S]], i32 30
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[S]], i32 31
+// LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z17testArrayOfStructv(
+// WINDOWS-SAME: ) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S:%.*]] = alloca [2 x [[STRUCT_S_0:%.*]]], align 16
+// WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 21
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 22
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 23
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 29
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[S]], i32 30
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[S]], i32 31
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testArrayOfStruct() {
   struct S {
@@ -763,21 +1192,37 @@ struct ArrOfStructsWithPadding {
 // "c" (1), PAD_1, "d" (1), PAD_2, "c" (2), PAD_3, "d" (2), PAD_4
 // %struct.ArrOfStructsWithPadding = type { [2 x %struct.Bar] }
 
-// CHECK-LABEL: define dso_local void @_Z27testArrOfStructsWithPaddingP23ArrOfStructsWithPadding(
-// CHECK-SAME: ptr noundef [[ARR:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[ARR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[ARR]], ptr [[ARR_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
-// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
-// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
-// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
-// CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z27testArrOfStructsWithPaddingP23ArrOfStructsWithPadding(
+// LINUX-SAME: ptr noundef [[ARR:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[ARR_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[ARR]], ptr [[ARR_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z27testArrOfStructsWithPaddingP23ArrOfStructsWithPadding(
+// WINDOWS-SAME: ptr noundef [[ARR:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[ARR_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[ARR]], ptr [[ARR_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testArrOfStructsWithPadding(ArrOfStructsWithPadding *arr) {
   __builtin_clear_padding(arr);
@@ -789,37 +1234,65 @@ struct S12 {
   char c;
 };
 
-// CHECK-LABEL: define dso_local void @_Z18testTemplateStructP3S12IiE(
-// CHECK-SAME: ptr noundef [[S12:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[S12_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[S12]], ptr [[S12_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S12_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
-// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z18testTemplateStructP3S12IiE(
+// LINUX-SAME: ptr noundef [[S12:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S12_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S12]], ptr [[S12_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S12_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z18testTemplateStructP3S12IiE(
+// WINDOWS-SAME: ptr noundef [[S12:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S12_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S12]], ptr [[S12_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S12_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testTemplateStruct(S12<int>* s12) {
   __builtin_clear_padding(s12);
 }
 
-// CHECK-LABEL: define dso_local void @_Z10testAtomicPU7_Atomic3S12IiE(
-// CHECK-SAME: ptr noundef [[AS12:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[AS12_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[AS12]], ptr [[AS12_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[AS12_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
-// CHECK-NEXT:    store i8 0, ptr [[TMP3]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z10testAtomicPU7_Atomic3S12IiE(
+// LINUX-SAME: ptr noundef [[AS12:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[AS12_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[AS12]], ptr [[AS12_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[AS12_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z10testAtomicPU7_Atomic3S12IiE(
+// WINDOWS-SAME: ptr noundef [[AS12:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[AS12_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[AS12]], ptr [[AS12_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[AS12_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testAtomic(_Atomic(S12<int>)* as12) {
   __builtin_clear_padding(as12);
@@ -835,18 +1308,31 @@ struct NonTriviallyCopyable {
   ~NonTriviallyCopyable() {}
 };
 
-// CHECK-LABEL: define dso_local void @_Z24testNonTriviallyCopyable20NonTriviallyCopyable(
-// CHECK-SAME: ptr noundef [[NTC:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[NTC_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    store ptr [[NTC]], ptr [[NTC_INDIRECT_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[NTC]], i32 5
-// CHECK-NEXT:    store i8 0, ptr [[TMP0]], align 1
-// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[NTC]], i32 6
-// CHECK-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[NTC]], i32 7
-// CHECK-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// CHECK-NEXT:    ret void
+// LINUX-LABEL: define dso_local void @_Z24testNonTriviallyCopyable20NonTriviallyCopyable(
+// LINUX-SAME: ptr noundef [[NTC:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[NTC_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[NTC]], ptr [[NTC_INDIRECT_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[NTC]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[NTC]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[NTC]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z24testNonTriviallyCopyable20NonTriviallyCopyable(
+// WINDOWS-SAME: ptr noundef [[NTC:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[NTC_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[NTC]], ptr [[NTC_INDIRECT_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[NTC]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[NTC]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[NTC]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    ret void
 //
 void testNonTriviallyCopyable(NonTriviallyCopyable ntc) {
   __builtin_clear_padding(&ntc);

>From 24c5c95d4363c23bad5dc0e3af4693d2e043e54e Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sun, 1 Mar 2026 21:40:46 +0000
Subject: [PATCH 16/22] c test

---
 clang/lib/CodeGen/CGBuiltin.cpp               |  25 +-
 .../CodeGen/builtin-clear-padding-codegen.c   | 756 ++++++++++++++++++
 .../builtin-clear-padding-codegen.cpp         |  50 +-
 clang/test/Sema/builtin-clear-padding.c       |  28 +-
 4 files changed, 820 insertions(+), 39 deletions(-)
 create mode 100644 clang/test/CodeGen/builtin-clear-padding-codegen.c

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 72b8e01af44f1..f6104cbe2a7df 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2753,7 +2753,12 @@ struct PaddingClearer {
     }
 
     if (auto *Record = D.Ty->getAsCXXRecordDecl()) {
-      VisitStruct(Record, D.StartBitOffset, D.VisitVirtualBase);
+      VisitCXXStruct(Record, D.StartBitOffset, D.VisitVirtualBase);
+      return;
+    }
+
+    if (auto *Record = D.Ty->getAsRecordDecl()) {
+      VisitCStruct(Record, D.StartBitOffset);
       return;
     }
 
@@ -2793,7 +2798,7 @@ struct PaddingClearer {
     }
   }
 
-  void VisitStruct(const CXXRecordDecl *R, uint64_t StartBitOffset,
+  void VisitCXXStruct(const CXXRecordDecl *R, uint64_t StartBitOffset,
                    bool VisitVirtualBase) {
     const auto &DL = CGF.CGM.getModule().getDataLayout();
 
@@ -2841,6 +2846,22 @@ struct PaddingClearer {
     }
   }
 
+  void VisitCStruct(const RecordDecl *R, uint64_t StartBitOffset) {
+    const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R);
+
+    for (auto *Field : R->fields()) {
+      auto FieldOffset = ASTLayout.getFieldOffset(Field->getFieldIndex());
+      if (Field->isBitField()) {
+        OccuppiedIntervals.push_back(BitInterval{
+            StartBitOffset + FieldOffset,
+            StartBitOffset + FieldOffset + Field->getBitWidthValue()});
+      } else {
+        Stack.push_back(Data{StartBitOffset + FieldOffset, Field->getType(),
+                             /*VisitVirtualBase*/ false});
+      }
+    }
+  }
+
   void VisitComplex(const ComplexType *CT, uint64_t StartBitOffset) {
     QualType ElementQualType = CT->getElementType();
     auto ElementSize = CGF.getContext().getTypeSizeInChars(ElementQualType);
diff --git a/clang/test/CodeGen/builtin-clear-padding-codegen.c b/clang/test/CodeGen/builtin-clear-padding-codegen.c
new file mode 100644
index 0000000000000..84a476034cda0
--- /dev/null
+++ b/clang/test/CodeGen/builtin-clear-padding-codegen.c
@@ -0,0 +1,756 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -std=c11 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=LINUX
+// RUN: %clang_cc1 -std=c11 -triple=x86_64-windows-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=WINDOWS
+
+struct Empty {};
+
+// LINUX-LABEL: define dso_local void @testEmpty(
+// LINUX-SAME: ptr noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[E_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[E]], ptr [[E_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testEmpty(
+// WINDOWS-SAME: ptr noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[E_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[E]], ptr [[E_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
+// WINDOWS-NEXT:    ret void
+//
+void testEmpty(struct Empty *e) {
+  // This should clear the one byte that Empty occupies.
+  __builtin_clear_padding(e);
+}
+
+
+// LINUX-LABEL: define dso_local void @testPrimitiveNoPadding(
+// LINUX-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[I_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[I]], ptr [[I_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testPrimitiveNoPadding(
+// WINDOWS-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[I_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[I]], ptr [[I_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// WINDOWS-NEXT:    ret void
+//
+void testPrimitiveNoPadding(int *i) {
+  // This should not clear any padding, since int has no padding.
+  __builtin_clear_padding(i);
+}
+
+
+// LINUX-LABEL: define dso_local void @testPrimitiveLongDouble(
+// LINUX-SAME: ptr noundef [[LD:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[LD_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[LD]], ptr [[LD_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[LD_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testPrimitiveLongDouble(
+// WINDOWS-SAME: ptr noundef [[LD:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[LD_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[LD]], ptr [[LD_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[LD_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testPrimitiveLongDouble(long double *ld) {
+  // padding [10, 15] on x86
+  __builtin_clear_padding(ld);
+}
+
+
+// LINUX-LABEL: define dso_local void @testPrimitiveComplexLongDouble(
+// LINUX-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
+// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// LINUX-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// LINUX-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testPrimitiveComplexLongDouble(
+// WINDOWS-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// WINDOWS-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testPrimitiveComplexLongDouble(_Complex long double *c) {
+  // padding [10, 15] and [26, 31] on x86
+  __builtin_clear_padding(c);
+}
+
+union U1 {
+  int i;
+  char c;
+};
+
+// LINUX-LABEL: define dso_local void @testUnionDifferentLength(
+// LINUX-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testUnionDifferentLength(
+// WINDOWS-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// WINDOWS-NEXT:    ret void
+//
+void testUnionDifferentLength(union U1 *u) {
+  // This should not clear the object representation bits of the non-active member.
+  __builtin_clear_padding(u);
+}
+
+struct S {
+  __attribute__((aligned(8))) char c1;
+};
+
+union U2 {
+  struct S s1;
+  char c2;
+};
+
+// LINUX-LABEL: define dso_local void @testUnionTailPaddingOfLongestMember(
+// LINUX-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testUnionTailPaddingOfLongestMember(
+// WINDOWS-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testUnionTailPaddingOfLongestMember(union U2 *u) {
+  // This should clear the tail padding of the longest member.
+  // [1 - 7]
+  __builtin_clear_padding(u);
+}
+
+
+struct __attribute__((aligned(4))) Foo {
+  char a;
+  _Alignas(2) char b;
+};
+
+struct __attribute__((aligned(4))) Bar {
+  char c;
+  _Alignas(2) char d;
+};
+
+struct __attribute__((aligned(4))) Baz {
+  struct Foo foo;
+  char e;
+  struct Bar bar;
+};
+
+// Baz structure:
+// "a", PAD_1, "b", PAD_2, "c", PAD_3, PAD_4, PAD_5, "c", PAD_6, "d", PAD_7
+// %struct.Baz = type { %struct.Foo, i8, [3 x i8], %struct.Bar }
+// %struct.Foo = type { i8, i8, i8, i8 }
+// %struct.Bar = type { i8, i8, i8, i8 }
+
+// LINUX-LABEL: define dso_local void @testStructPaddingInBetweenMembers(
+// LINUX-SAME: ptr noundef [[BAZ:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[BAZ_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[BAZ]], ptr [[BAZ_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAZ_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testStructPaddingInBetweenMembers(
+// WINDOWS-SAME: ptr noundef [[BAZ:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[BAZ_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[BAZ]], ptr [[BAZ_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAZ_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testStructPaddingInBetweenMembers(struct Baz *baz) {
+  // this should clear all the padding in between various members
+  __builtin_clear_padding(baz);
+}
+
+// LINUX-LABEL: define dso_local void @testStructVolatile(
+// LINUX-SAME: ptr noundef [[BAZ:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[BAZ_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[BAZ]], ptr [[BAZ_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAZ_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testStructVolatile(
+// WINDOWS-SAME: ptr noundef [[BAZ:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[BAZ_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[BAZ]], ptr [[BAZ_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAZ_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testStructVolatile(volatile struct Baz *baz) {
+  // this should clear all the padding in between various members
+  __builtin_clear_padding(baz);
+}
+
+
+
+
+struct S3 {
+  long double l;
+  _Bool b;
+};
+
+// LINUX-LABEL: define dso_local void @testStructWithLongDouble(
+// LINUX-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 17
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 18
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 19
+// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 20
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 21
+// LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// LINUX-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 22
+// LINUX-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// LINUX-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP0]], i32 23
+// LINUX-NEXT:    store i8 0, ptr [[TMP13]], align 1
+// LINUX-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i32 24
+// LINUX-NEXT:    store i8 0, ptr [[TMP14]], align 1
+// LINUX-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TMP0]], i32 25
+// LINUX-NEXT:    store i8 0, ptr [[TMP15]], align 1
+// LINUX-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
+// LINUX-NEXT:    store i8 0, ptr [[TMP16]], align 1
+// LINUX-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
+// LINUX-NEXT:    store i8 0, ptr [[TMP17]], align 1
+// LINUX-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
+// LINUX-NEXT:    store i8 0, ptr [[TMP18]], align 1
+// LINUX-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// LINUX-NEXT:    store i8 0, ptr [[TMP19]], align 1
+// LINUX-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// LINUX-NEXT:    store i8 0, ptr [[TMP20]], align 1
+// LINUX-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// LINUX-NEXT:    store i8 0, ptr [[TMP21]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testStructWithLongDouble(
+// WINDOWS-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 17
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 18
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 19
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 20
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 21
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// WINDOWS-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 22
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// WINDOWS-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP0]], i32 23
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP13]], align 1
+// WINDOWS-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i32 24
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP14]], align 1
+// WINDOWS-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TMP0]], i32 25
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP15]], align 1
+// WINDOWS-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP16]], align 1
+// WINDOWS-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP17]], align 1
+// WINDOWS-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP18]], align 1
+// WINDOWS-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP19]], align 1
+// WINDOWS-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP20]], align 1
+// WINDOWS-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP21]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testStructWithLongDouble(struct S3 *s) {
+  // "long double data[0-9]", PAD [10-15], "b", PAD [17-31]
+  __builtin_clear_padding(s);
+}
+
+struct S11 {
+  // will usually occupy 2 bytes:
+  unsigned char b1 : 3; // 1st 3 bits (in 1st byte) are b1
+  unsigned char b2 : 2; // next 2 bits (in 1st byte). The rest bits in byte 1 are unused
+  unsigned char b3 : 6; // 6 bits for b3 - doesn't fit into the 1st byte => starts a 2nd
+  unsigned char b4 : 2; // 2 bits for b4 - next (and final) bits in the 2nd byte
+};
+
+// LINUX-LABEL: define dso_local void @testBitFields(
+// LINUX-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
+// LINUX-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], 31
+// LINUX-NEXT:    store i8 [[TMP3]], ptr [[TMP1]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testBitFields(
+// WINDOWS-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
+// WINDOWS-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], 31
+// WINDOWS-NEXT:    store i8 [[TMP3]], ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testBitFields(struct S11 *s) {
+  // "b1" [0-2], "b2" [3-4], PAD [5-7], "b3" [8-13], "b4" [14-15]
+  // to clear 5-7, we should AND 0b00011111 (31)
+  __builtin_clear_padding(s);
+}
+
+
+// LINUX-LABEL: define dso_local void @testArrayNoPadding(
+// LINUX-SAME: ) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[I:%.*]] = alloca [4 x i32], align 16
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testArrayNoPadding(
+// WINDOWS-SAME: ) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[I:%.*]] = alloca [4 x i32], align 16
+// WINDOWS-NEXT:    ret void
+//
+void testArrayNoPadding(void) {
+  int i[4];
+  // there is no padding in the array.
+  __builtin_clear_padding(&i);
+}
+
+// LINUX-LABEL: define dso_local void @testArrayLongDouble(
+// LINUX-SAME: ) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[LD:%.*]] = alloca [2 x x86_fp80], align 16
+// LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[LD]], i32 10
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[LD]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[LD]], i32 12
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[LD]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[LD]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[LD]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[LD]], i32 26
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[LD]], i32 27
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[LD]], i32 28
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[LD]], i32 29
+// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[LD]], i32 30
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[LD]], i32 31
+// LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testArrayLongDouble(
+// WINDOWS-SAME: ) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[LD:%.*]] = alloca [2 x x86_fp80], align 16
+// WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[LD]], i32 10
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[LD]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[LD]], i32 12
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[LD]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[LD]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[LD]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[LD]], i32 26
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[LD]], i32 27
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[LD]], i32 28
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[LD]], i32 29
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[LD]], i32 30
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[LD]], i32 31
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testArrayLongDouble() {
+  // long double 0, [0-9] PAD [10-15]
+  // long double 1, [16-25] PAD [26-31]
+  long double ld[2];
+  __builtin_clear_padding(&ld);
+}
+
+// LINUX-LABEL: define dso_local void @testArrayOfStruct(
+// LINUX-SAME: ) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S:%.*]] = alloca [2 x [[STRUCT_S_LOCAL:%.*]]], align 16
+// LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 21
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 22
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 23
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 29
+// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[S]], i32 30
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[S]], i32 31
+// LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testArrayOfStruct(
+// WINDOWS-SAME: ) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S:%.*]] = alloca [2 x [[STRUCT_S_LOCAL:%.*]]], align 16
+// WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 21
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 22
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 23
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 29
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[S]], i32 30
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[S]], i32 31
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testArrayOfStruct(void) {
+  struct S_local {
+    int i1;
+    char c1;
+    int i2;
+    char c2;
+  };
+
+  // S[0].i1 [0-3], S[0].c1 [4], PAD [5-7],
+  // S[0].i2 [8-11], S[0].c2 [12], PAD [13-15],
+  // S[1].i1 [16-19], S[1].c1 [20], PAD [21-23],
+  // S[1].i2 [24-27], S[1].c2 [28], PAD [29-31]
+
+  struct S_local s[2];
+  __builtin_clear_padding(&s);
+}
+
+struct ArrOfStructsWithPadding {
+  struct Bar bars[2];
+};
+
+// ArrOfStructsWithPadding structure:
+// "c" (1), PAD_1, "d" (1), PAD_2, "c" (2), PAD_3, "d" (2), PAD_4
+// %struct.ArrOfStructsWithPadding = type { [2 x %struct.Bar] }
+
+// LINUX-LABEL: define dso_local void @testArrOfStructsWithPadding(
+// LINUX-SAME: ptr noundef [[ARR:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[ARR_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[ARR]], ptr [[ARR_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testArrOfStructsWithPadding(
+// WINDOWS-SAME: ptr noundef [[ARR:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[ARR_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[ARR]], ptr [[ARR_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testArrOfStructsWithPadding(struct ArrOfStructsWithPadding *arr) {
+  __builtin_clear_padding(arr);
+}
+
+// LINUX-LABEL: define dso_local void @testAtomic(
+// LINUX-SAME: ptr noundef [[BAR:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[BAR_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[BAR]], ptr [[BAR_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testAtomic(
+// WINDOWS-SAME: ptr noundef [[BAR:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[BAR_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[BAR]], ptr [[BAR_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testAtomic(_Atomic(struct Bar)* bar) {
+  __builtin_clear_padding(bar);
+}
diff --git a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
index bb121fba9db17..62414c94dad3b 100644
--- a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
+++ b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
@@ -1036,12 +1036,10 @@ void testArrayNoPadding() {
   __builtin_clear_padding(&i);
 }
 
-// LINUX-LABEL: define dso_local void @_Z19testArrayLongDoubleRA2_e(
-// LINUX-SAME: ptr noundef nonnull align 16 dereferenceable(32) [[ARR:%.*]]) #[[ATTR0]] {
+// LINUX-LABEL: define dso_local void @_Z19testArrayLongDoublev(
+// LINUX-SAME: ) #[[ATTR0]] {
 // LINUX-NEXT:  [[ENTRY:.*:]]
-// LINUX-NEXT:    [[ARR_ADDR:%.*]] = alloca ptr, align 8
 // LINUX-NEXT:    [[LD:%.*]] = alloca [2 x x86_fp80], align 16
-// LINUX-NEXT:    store ptr [[ARR]], ptr [[ARR_ADDR]], align 8
 // LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[LD]], i32 10
 // LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[LD]], i32 11
@@ -1068,12 +1066,10 @@ void testArrayNoPadding() {
 // LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
 // LINUX-NEXT:    ret void
 //
-// WINDOWS-LABEL: define dso_local void @_Z19testArrayLongDoubleRA2_e(
-// WINDOWS-SAME: ptr noundef nonnull align 16 dereferenceable(32) [[ARR:%.*]]) #[[ATTR0]] {
+// WINDOWS-LABEL: define dso_local void @_Z19testArrayLongDoublev(
+// WINDOWS-SAME: ) #[[ATTR0]] {
 // WINDOWS-NEXT:  [[ENTRY:.*:]]
-// WINDOWS-NEXT:    [[ARR_ADDR:%.*]] = alloca ptr, align 8
 // WINDOWS-NEXT:    [[LD:%.*]] = alloca [2 x x86_fp80], align 16
-// WINDOWS-NEXT:    store ptr [[ARR]], ptr [[ARR_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[LD]], i32 10
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[LD]], i32 11
@@ -1100,7 +1096,7 @@ void testArrayNoPadding() {
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
 // WINDOWS-NEXT:    ret void
 //
-void testArrayLongDouble(long double (&arr)[2]) {
+void testArrayLongDouble() {
   // long double 0, [0-9] PAD [10-15]
   // long double 1, [16-25] PAD [26-31]
   long double ld[2];
@@ -1266,36 +1262,32 @@ void testTemplateStruct(S12<int>* s12) {
   __builtin_clear_padding(s12);
 }
 
-// LINUX-LABEL: define dso_local void @_Z10testAtomicPU7_Atomic3S12IiE(
-// LINUX-SAME: ptr noundef [[AS12:%.*]]) #[[ATTR0]] {
+// LINUX-LABEL: define dso_local void @_Z10testAtomicPU7_Atomic3Bar(
+// LINUX-SAME: ptr noundef [[BAR:%.*]]) #[[ATTR0]] {
 // LINUX-NEXT:  [[ENTRY:.*:]]
-// LINUX-NEXT:    [[AS12_ADDR:%.*]] = alloca ptr, align 8
-// LINUX-NEXT:    store ptr [[AS12]], ptr [[AS12_ADDR]], align 8
-// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[AS12_ADDR]], align 8
-// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    [[BAR_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[BAR]], ptr [[BAR_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
 // LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
 // LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
-// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // LINUX-NEXT:    ret void
 //
-// WINDOWS-LABEL: define dso_local void @_Z10testAtomicPU7_Atomic3S12IiE(
-// WINDOWS-SAME: ptr noundef [[AS12:%.*]]) #[[ATTR0]] {
+// WINDOWS-LABEL: define dso_local void @_Z10testAtomicPU7_Atomic3Bar(
+// WINDOWS-SAME: ptr noundef [[BAR:%.*]]) #[[ATTR0]] {
 // WINDOWS-NEXT:  [[ENTRY:.*:]]
-// WINDOWS-NEXT:    [[AS12_ADDR:%.*]] = alloca ptr, align 8
-// WINDOWS-NEXT:    store ptr [[AS12]], ptr [[AS12_ADDR]], align 8
-// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[AS12_ADDR]], align 8
-// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    [[BAR_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[BAR]], ptr [[BAR_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // WINDOWS-NEXT:    ret void
 //
-void testAtomic(_Atomic(S12<int>)* as12) {
-  __builtin_clear_padding(as12);
+void testAtomic(_Atomic(Bar)* bar) {
+  __builtin_clear_padding(bar);
 }
 
 
diff --git a/clang/test/Sema/builtin-clear-padding.c b/clang/test/Sema/builtin-clear-padding.c
index 4ec22fd5d5c70..14ab7476e94b1 100644
--- a/clang/test/Sema/builtin-clear-padding.c
+++ b/clang/test/Sema/builtin-clear-padding.c
@@ -2,17 +2,29 @@
 
 struct Foo {};
 
-struct Incomplete; // expected-note {{forward declaration of 'struct Incomplete'}}
-
-void test(int a, struct Foo b, void *c, int *d, struct Foo *e, const struct Foo *f, struct Incomplete *g) {
-  __builtin_clear_padding(); // expected-error {{too few arguments to function call, expected 1, have 0}}
-  __builtin_clear_padding(d, d); // expected-error {{too many arguments to function call, expected 1, have 2}}
-
+void test(int a, struct Foo b, int *d, struct Foo *e, const struct Foo *f) {
   __builtin_clear_padding(a); // expected-error {{passing 'int' to parameter of incompatible type pointer: type mismatch at 1st parameter ('int' vs pointer)}}
   __builtin_clear_padding(b); // expected-error {{passing 'struct Foo' to parameter of incompatible type pointer: type mismatch at 1st parameter ('struct Foo' vs pointer)}}
-  __builtin_clear_padding(c); // expected-error {{variable has incomplete type 'void'}}
   __builtin_clear_padding(d); // This should not error.
   __builtin_clear_padding(e); // This should not error.
   __builtin_clear_padding(f); // expected-error {{read-only variable is not assignable}}
-  __builtin_clear_padding(g); // expected-error {{variable has incomplete type 'struct Incomplete'}}
+}
+
+struct Incomplete; // expected-note {{forward declaration of 'struct Incomplete'}}
+
+void testIncomplete(void* v, struct Incomplete *i) {
+  __builtin_clear_padding(v); // expected-error {{variable has incomplete type 'void'}}
+  __builtin_clear_padding(i); // expected-error {{variable has incomplete type 'struct Incomplete'}}
+}
+
+void testNumArgs(int* i) {
+  __builtin_clear_padding(); // expected-error {{too few arguments to function call, expected 1, have 0}}
+  __builtin_clear_padding(i); // This should not error.
+  __builtin_clear_padding(i, i); // expected-error {{too many arguments to function call, expected 1, have 2}}
+  __builtin_clear_padding(i, i, i); // expected-error {{too many arguments to function call, expected 1, have 3}}
+  __builtin_clear_padding(i, i, i, i); // expected-error {{too many arguments to function call, expected 1, have 4}}
+}
+
+void testFunctionPointer(void(*f)()) {
+  __builtin_clear_padding(f); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('void (*)()' invalid)}}
 }

>From 21cb236b32bf68e752921786f18880a82cffeb4c Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Mon, 2 Mar 2026 18:06:33 +0000
Subject: [PATCH 17/22] vla

---
 .../clang/Basic/DiagnosticSemaKinds.td        |  2 +
 clang/lib/CodeGen/CGBuiltin.cpp               | 73 +++++++------------
 clang/lib/Sema/SemaChecking.cpp               |  7 ++
 .../CodeGen/builtin-clear-padding-codegen.c   | 40 +++++++++-
 .../builtin-clear-padding-codegen.cpp         | 38 ++++++++++
 clang/test/Sema/builtin-clear-padding.c       | 21 ++++++
 clang/test/SemaCXX/builtin-clear-padding.cpp  | 20 +++++
 7 files changed, 155 insertions(+), 46 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index f8bf54e28994b..51bc712af3f09 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9683,6 +9683,8 @@ def warn_atomic_stripped_in_enum : Warning<
 def err_clear_padding_needs_trivial_copy : Error<
   "argument to __builtin_clear_padding must be a pointer to a "
   "trivially-copyable type (%0 invalid)">;
+def err_clear_padding_no_flexible_array : Error<
+  "%0 has flexible array member, which is unsupported by __builtin_clear_padding">;
 
 def err_overflow_builtin_must_be_int : Error<
   "operand argument to %select{overflow builtin|checked integer operation}0 "
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index f6104cbe2a7df..0a4d65c0c7811 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2752,13 +2752,8 @@ struct PaddingClearer {
       return;
     }
 
-    if (auto *Record = D.Ty->getAsCXXRecordDecl()) {
-      VisitCXXStruct(Record, D.StartBitOffset, D.VisitVirtualBase);
-      return;
-    }
-
     if (auto *Record = D.Ty->getAsRecordDecl()) {
-      VisitCStruct(Record, D.StartBitOffset);
+      VisitStruct(Record, D.StartBitOffset, D.VisitVirtualBase);
       return;
     }
 
@@ -2798,56 +2793,44 @@ struct PaddingClearer {
     }
   }
 
-  void VisitCXXStruct(const CXXRecordDecl *R, uint64_t StartBitOffset,
+  void VisitStruct(const RecordDecl *R, uint64_t StartBitOffset,
                    bool VisitVirtualBase) {
     const auto &DL = CGF.CGM.getModule().getDataLayout();
-
     const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R);
-    if (ASTLayout.hasOwnVFPtr()) {
-      OccuppiedIntervals.push_back(BitInterval{
-          StartBitOffset, StartBitOffset + DL.getPointerSizeInBits()});
-    }
 
-    const auto VisitBase = [&ASTLayout, StartBitOffset, this](
-                               const CXXBaseSpecifier &Base, auto GetOffset) {
-      auto *BaseRecord = Base.getType()->getAsCXXRecordDecl();
-      if (!BaseRecord) {
-        return;
+    auto *CXXRecord = dyn_cast<CXXRecordDecl>(R);
+
+    if (CXXRecord) {
+      if (ASTLayout.hasOwnVFPtr()) {
+        OccuppiedIntervals.push_back(BitInterval{
+            StartBitOffset, StartBitOffset + DL.getPointerSizeInBits()});
       }
-      auto BaseOffset =
-          std::invoke(GetOffset, ASTLayout, BaseRecord).getQuantity();
 
-      Stack.push_back(Data{StartBitOffset + BaseOffset * CharWidth,
-                           Base.getType(), /*VisitVirtualBase*/ false});
-    };
+      const auto VisitBase = [&ASTLayout, StartBitOffset, this](
+                                 const CXXBaseSpecifier &Base, auto GetOffset) {
+        auto *BaseRecord = Base.getType()->getAsCXXRecordDecl();
+        if (!BaseRecord) {
+          return;
+        }
+        auto BaseOffset =
+            std::invoke(GetOffset, ASTLayout, BaseRecord).getQuantity();
 
-    for (auto Base : R->bases()) {
-      if (!Base.isVirtual()) {
-        VisitBase(Base, &ASTRecordLayout::getBaseClassOffset);
-      }
-    }
+        Stack.push_back(Data{StartBitOffset + BaseOffset * CharWidth,
+                             Base.getType(), /*VisitVirtualBase*/ false});
+      };
 
-    if (VisitVirtualBase) {
-      for (auto VBase : R->vbases()) {
-        VisitBase(VBase, &ASTRecordLayout::getVBaseClassOffset);
+      for (auto Base : CXXRecord->bases()) {
+        if (!Base.isVirtual()) {
+          VisitBase(Base, &ASTRecordLayout::getBaseClassOffset);
+        }
       }
-    }
 
-    for (auto *Field : R->fields()) {
-      auto FieldOffset = ASTLayout.getFieldOffset(Field->getFieldIndex());
-      if (Field->isBitField()) {
-        OccuppiedIntervals.push_back(BitInterval{
-            StartBitOffset + FieldOffset,
-            StartBitOffset + FieldOffset + Field->getBitWidthValue()});
-      } else {
-        Stack.push_back(Data{StartBitOffset + FieldOffset, Field->getType(),
-                             /*VisitVirtualBase*/ true});
+      if (VisitVirtualBase) {
+        for (auto VBase : CXXRecord->vbases()) {
+          VisitBase(VBase, &ASTRecordLayout::getVBaseClassOffset);
+        }
       }
     }
-  }
-
-  void VisitCStruct(const RecordDecl *R, uint64_t StartBitOffset) {
-    const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R);
 
     for (auto *Field : R->fields()) {
       auto FieldOffset = ASTLayout.getFieldOffset(Field->getFieldIndex());
@@ -2857,7 +2840,7 @@ struct PaddingClearer {
             StartBitOffset + FieldOffset + Field->getBitWidthValue()});
       } else {
         Stack.push_back(Data{StartBitOffset + FieldOffset, Field->getType(),
-                             /*VisitVirtualBase*/ false});
+                             /*VisitVirtualBase*/ true});
       }
     }
   }
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index df2ed87204255..851ed3128e938 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3180,6 +3180,13 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
       return ExprError();
     }
 
+    if (auto *Record = PointeeType->getAsRecordDecl();
+        Record && Record->hasFlexibleArrayMember()) {
+      Diag(PtrArg->getBeginLoc(), diag::err_clear_padding_no_flexible_array)
+          << PointeeType << PtrArg->getSourceRange();
+      return ExprError();
+    }
+
     break;
   }
   case Builtin::BI__sync_fetch_and_add:
diff --git a/clang/test/CodeGen/builtin-clear-padding-codegen.c b/clang/test/CodeGen/builtin-clear-padding-codegen.c
index 84a476034cda0..0c59e8677cf3a 100644
--- a/clang/test/CodeGen/builtin-clear-padding-codegen.c
+++ b/clang/test/CodeGen/builtin-clear-padding-codegen.c
@@ -21,7 +21,7 @@ struct Empty {};
 // WINDOWS-NEXT:    ret void
 //
 void testEmpty(struct Empty *e) {
-  // This should clear the one byte that Empty occupies.
+  // Emtpy struct is empty in C, no padding
   __builtin_clear_padding(e);
 }
 
@@ -754,3 +754,41 @@ void testArrOfStructsWithPadding(struct ArrOfStructsWithPadding *arr) {
 void testAtomic(_Atomic(struct Bar)* bar) {
   __builtin_clear_padding(bar);
 }
+
+typedef float Float3Vec __attribute__((ext_vector_type(3)));
+
+// LINUX-LABEL: define dso_local void @testAttributedType(
+// LINUX-SAME: ptr noundef [[V:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[V_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[V]], ptr [[V_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[V_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testAttributedType(
+// WINDOWS-SAME: ptr noundef [[V:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[V_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[V]], ptr [[V_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[V_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testAttributedType(Float3Vec* v) {
+  __builtin_clear_padding(v);
+}
diff --git a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
index 62414c94dad3b..3563a8d04aa3b 100644
--- a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
+++ b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
@@ -1329,3 +1329,41 @@ struct NonTriviallyCopyable {
 void testNonTriviallyCopyable(NonTriviallyCopyable ntc) {
   __builtin_clear_padding(&ntc);
 }
+
+typedef float Float3Vec __attribute__((ext_vector_type(3)));
+
+// LINUX-LABEL: define dso_local void @_Z18testAttributedTypePDv3_f(
+// LINUX-SAME: ptr noundef [[V:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[V_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[V]], ptr [[V_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[V_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z18testAttributedTypePDv3_f(
+// WINDOWS-SAME: ptr noundef [[V:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[V_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[V]], ptr [[V_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[V_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testAttributedType(Float3Vec* v) {
+  __builtin_clear_padding(v);
+}
diff --git a/clang/test/Sema/builtin-clear-padding.c b/clang/test/Sema/builtin-clear-padding.c
index 14ab7476e94b1..c0d83a522ac3e 100644
--- a/clang/test/Sema/builtin-clear-padding.c
+++ b/clang/test/Sema/builtin-clear-padding.c
@@ -28,3 +28,24 @@ void testNumArgs(int* i) {
 void testFunctionPointer(void(*f)()) {
   __builtin_clear_padding(f); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('void (*)()' invalid)}}
 }
+
+struct WithVLA {
+  int i;
+  char c[];
+};
+
+struct WithVLA2 {
+  int i2;
+  struct WithVLA w;
+};
+
+struct WithVLA3 {
+  struct WithVLA2 w2;
+};
+
+void testVLA(struct WithVLA* w1, struct WithVLA2* w2, struct WithVLA3* w3) {
+  __builtin_clear_padding(w1); // expected-error {{'struct WithVLA' has flexible array member, which is unsupported by __builtin_clear_padding}}
+  __builtin_clear_padding(w2); // expected-error {{'struct WithVLA2' has flexible array member, which is unsupported by __builtin_clear_padding}}
+  __builtin_clear_padding(w3); // expected-error {{'struct WithVLA3' has flexible array member, which is unsupported by __builtin_clear_padding}}
+}
+
diff --git a/clang/test/SemaCXX/builtin-clear-padding.cpp b/clang/test/SemaCXX/builtin-clear-padding.cpp
index c3546f5d0e33d..e03475a3af2ad 100644
--- a/clang/test/SemaCXX/builtin-clear-padding.cpp
+++ b/clang/test/SemaCXX/builtin-clear-padding.cpp
@@ -76,3 +76,23 @@ void testMemberPointer(Foo* Bar::*mp) {
 void testFunctionPointer(void(*f)()) {
   __builtin_clear_padding(f); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('void (*)()' invalid)}}
 }
+
+struct WithVLA {
+  int i;
+  char c[];
+};
+
+struct WithVLA2 {
+  int i2;
+  WithVLA w;
+};
+
+struct WithVLA3 {
+  WithVLA2 w2;
+};
+
+void testVLA(WithVLA* w1, WithVLA2* w2, WithVLA3* w3) {
+  __builtin_clear_padding(w1); // expected-error {{'WithVLA' has flexible array member, which is unsupported by __builtin_clear_padding}}
+  __builtin_clear_padding(w2); // expected-error {{'WithVLA2' has flexible array member, which is unsupported by __builtin_clear_padding}}
+  __builtin_clear_padding(w3); // expected-error {{'WithVLA3' has flexible array member, which is unsupported by __builtin_clear_padding}}
+}

>From deb25b3b2878b180b7f81d0e46fb1db04341cb31 Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sat, 7 Mar 2026 08:40:07 +0000
Subject: [PATCH 18/22] remove vla in codegen and libcxx tests

---
 .../builtin-clear-padding-codegen.cpp         | 47 -------------------
 .../atomics/builtin_clear_padding.pass.cpp    | 30 ------------
 2 files changed, 77 deletions(-)

diff --git a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
index 3563a8d04aa3b..43a644c7f81cb 100644
--- a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
+++ b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
@@ -374,53 +374,6 @@ void testStructVolatile(volatile Baz *baz) {
   __builtin_clear_padding(baz);
 }
 
-struct UnsizedTail {
-  int size;
-  alignas(8) char buf[];
-
-  UnsizedTail(int size) : size(size) {}
-};
-
-// UnsizedTail structure:
-// "size", PAD_1, PAD_2, PAD_3, PAD_4
-// %struct.UnsizedTail = type { i32, [4 x i8], [0 x i8] }
-
-// LINUX-LABEL: define dso_local void @_Z21testStructUnsizedTailP11UnsizedTail(
-// LINUX-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
-// LINUX-NEXT:  [[ENTRY:.*:]]
-// LINUX-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
-// LINUX-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
-// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
-// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
-// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
-// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
-// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
-// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
-// LINUX-NEXT:    ret void
-//
-// WINDOWS-LABEL: define dso_local void @_Z21testStructUnsizedTailP11UnsizedTail(
-// WINDOWS-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
-// WINDOWS-NEXT:  [[ENTRY:.*:]]
-// WINDOWS-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
-// WINDOWS-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
-// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
-// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
-// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
-// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
-// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
-// WINDOWS-NEXT:    ret void
-//
-void testStructUnsizedTail(UnsizedTail *u) {
-  __builtin_clear_padding(u);
-}
-
 class S1 {
   int x;
   char c;
diff --git a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
index fad2b0981e487..52b4039ae484e 100644
--- a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
+++ b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
@@ -179,37 +179,7 @@ void testAllStructsForType(T a, T b, T c, T d) {
   }
 }
 
-struct UnsizedTail {
-  int size;
-  alignas(8) char buf[];
-
-  UnsizedTail(int size) : size(size) {}
-};
-
 void otherStructTests() {
-  // Unsized Tail
-  {
-    const size_t size1 = sizeof(UnsizedTail) + 4;
-    char buff1[size1];
-    char buff2[size1];
-    memset(buff1, 0, size1);
-    memset(buff2, 42, size1);
-    auto* u1   = new (buff1) UnsizedTail(4);
-    u1->buf[0] = 1;
-    u1->buf[1] = 2;
-    u1->buf[2] = 3;
-    u1->buf[3] = 4;
-    auto* u2   = new (buff2) UnsizedTail(4);
-    u2->buf[0] = 1;
-    u2->buf[1] = 2;
-    u2->buf[2] = 3;
-    u2->buf[3] = 4;
-    assert(memcmp(u1, u2, sizeof(UnsizedTail)) != 0);
-    __builtin_clear_padding(u2);
-
-    assert(memcmp(u1, u2, sizeof(UnsizedTail)) == 0);
-  }
-
   // basic padding on the heap
   {
     using B      = BasicWithPadding<8, 4, char>;

>From 51fd896da32fd191d64ce78a70d25b055dd263f7 Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sat, 7 Mar 2026 08:41:27 +0000
Subject: [PATCH 19/22] clang format

---
 clang/lib/CodeGen/CGBuiltin.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 0a4d65c0c7811..275496ad8102a 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2884,7 +2884,8 @@ struct PaddingClearer {
     OccuppiedIntervals = Merged;
   }
 
-  llvm::SmallVector<BitInterval> GetPaddingIntervals(uint64_t SizeInBits) const {
+  llvm::SmallVector<BitInterval>
+  GetPaddingIntervals(uint64_t SizeInBits) const {
     llvm::SmallVector<BitInterval> Results;
     if (OccuppiedIntervals.size() == 1 &&
         OccuppiedIntervals.front().First == 0 &&

>From 342fe51e090cd9b82edbc188437b4fd77925f2aa Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Fri, 1 May 2026 18:30:15 +0100
Subject: [PATCH 20/22] address review comments

---
 clang/lib/CodeGen/CGBuiltin.cpp               |  27 ++-
 .../CodeGen/builtin-clear-padding-codegen.c   | 144 ++++++------
 .../builtin-clear-padding-codegen.cpp         | 216 +++++++++---------
 3 files changed, 198 insertions(+), 189 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 275496ad8102a..fc728217c9e59 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2714,7 +2714,7 @@ struct PaddingClearer {
   PaddingClearer(CodeGenFunction &F)
       : CGF(F), CharWidth(CGF.getContext().getCharWidth()) {}
 
-  void run(Value *Ptr, QualType Ty) {
+  void run(Address Src, QualType Ty) {
     OccuppiedIntervals.clear();
     Stack.clear();
 
@@ -2729,7 +2729,7 @@ struct PaddingClearer {
     auto PaddingIntervals =
         GetPaddingIntervals(CGF.getContext().getTypeSize(Ty));
     for (const auto &Interval : PaddingIntervals) {
-      ClearPadding(Ptr, Interval);
+      ClearPadding(Src, Interval);
     }
   }
 
@@ -2906,8 +2906,9 @@ struct PaddingClearer {
     return Results;
   }
 
-  void ClearPadding(Value *Ptr, const BitInterval &PaddingInterval) {
-    auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
+  void ClearPadding(Address Src, const BitInterval &PaddingInterval) {
+    auto *I8Ptr =
+        CGF.Builder.CreateBitCast(Src.getBasePointer(), CGF.Int8PtrTy);
     auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
 
     // Calculate byte indices and bit positions
@@ -2920,7 +2921,9 @@ struct PaddingClearer {
       // Interval is within a single byte
       auto *Index = ConstantInt::get(CGF.IntTy, StartByte);
       auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
-      Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());
+      Address ElementAddr(Element, CGF.Int8Ty,
+                          Src.getAlignment().alignmentAtOffset(
+                              CharUnits::fromQuantity(StartByte)));
 
       auto *Value = CGF.Builder.CreateLoad(ElementAddr);
 
@@ -2937,7 +2940,9 @@ struct PaddingClearer {
       if (StartBit != 0) {
         auto *Index = ConstantInt::get(CGF.IntTy, StartByte);
         auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
-        Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());
+        Address ElementAddr(Element, CGF.Int8Ty,
+                            Src.getAlignment().alignmentAtOffset(
+                                CharUnits::fromQuantity(StartByte)));
 
         auto *Value = CGF.Builder.CreateLoad(ElementAddr);
 
@@ -2954,7 +2959,9 @@ struct PaddingClearer {
       for (auto Offset = StartByte; Offset < EndByte; ++Offset) {
         auto *Index = ConstantInt::get(CGF.IntTy, Offset);
         auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
-        Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());
+        Address ElementAddr(Element, CGF.Int8Ty,
+                            Src.getAlignment().alignmentAtOffset(
+                                CharUnits::fromQuantity(Offset)));
 
         CGF.Builder.CreateStore(Zero, ElementAddr);
       }
@@ -2963,7 +2970,9 @@ struct PaddingClearer {
       if (EndBit != 0) {
         auto *Index = ConstantInt::get(CGF.IntTy, EndByte);
         auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
-        Address ElementAddr(Element, CGF.Int8Ty, CharUnits::One());
+        Address ElementAddr(Element, CGF.Int8Ty,
+                            Src.getAlignment().alignmentAtOffset(
+                                CharUnits::fromQuantity(EndByte)));
 
         auto *Value = CGF.Builder.CreateLoad(ElementAddr);
 
@@ -5480,7 +5489,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     Address Src = EmitPointerWithAlignment(E->getArg(0));
     auto PointeeTy = E->getArg(0)->getType()->getPointeeType();
     PaddingClearer clearer{*this};
-    clearer.run(Src.getBasePointer(), PointeeTy);
+    clearer.run(Src, PointeeTy);
     return RValue::get(nullptr);
   }
   case Builtin::BI__sync_fetch_and_add:
diff --git a/clang/test/CodeGen/builtin-clear-padding-codegen.c b/clang/test/CodeGen/builtin-clear-padding-codegen.c
index 0c59e8677cf3a..32086250cf67f 100644
--- a/clang/test/CodeGen/builtin-clear-padding-codegen.c
+++ b/clang/test/CodeGen/builtin-clear-padding-codegen.c
@@ -55,15 +55,15 @@ void testPrimitiveNoPadding(int *i) {
 // LINUX-NEXT:    store ptr [[LD]], ptr [[LD_ADDR]], align 8
 // LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[LD_ADDR]], align 8
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
-// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
 // LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
-// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 4
 // LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
 // LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
 // LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
-// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 2
 // LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
 // LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
 // LINUX-NEXT:    ret void
@@ -75,15 +75,15 @@ void testPrimitiveNoPadding(int *i) {
 // WINDOWS-NEXT:    store ptr [[LD]], ptr [[LD_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[LD_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 4
 // WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
 // WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 2
 // WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
 // WINDOWS-NEXT:    ret void
@@ -101,27 +101,27 @@ void testPrimitiveLongDouble(long double *ld) {
 // LINUX-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
 // LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
-// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
 // LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
-// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 4
 // LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
 // LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
 // LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
-// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 2
 // LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
 // LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
 // LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
-// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 2
 // LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
 // LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
 // LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
-// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 4
 // LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
 // LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 1
 // LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
-// LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 2
 // LINUX-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
 // LINUX-NEXT:    store i8 0, ptr [[TMP12]], align 1
 // LINUX-NEXT:    ret void
@@ -133,27 +133,27 @@ void testPrimitiveLongDouble(long double *ld) {
 // WINDOWS-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 4
 // WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
 // WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 2
 // WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
 // WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 2
 // WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
 // WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 4
 // WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 1
 // WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 2
 // WINDOWS-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP12]], align 1
 // WINDOWS-NEXT:    ret void
@@ -207,15 +207,15 @@ union U2 {
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
 // LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
 // LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
-// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 2
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
 // LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
-// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 4
 // LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
 // LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
 // LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 2
 // LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
 // LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
 // LINUX-NEXT:    ret void
@@ -229,15 +229,15 @@ union U2 {
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
 // WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 2
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 4
 // WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
 // WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 2
 // WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
 // WINDOWS-NEXT:    ret void
@@ -284,7 +284,7 @@ struct __attribute__((aligned(4))) Baz {
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
 // LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 2
 // LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
 // LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
 // LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
@@ -306,7 +306,7 @@ struct __attribute__((aligned(4))) Baz {
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 2
 // WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
 // WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
@@ -333,7 +333,7 @@ void testStructPaddingInBetweenMembers(struct Baz *baz) {
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
 // LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 2
 // LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
 // LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
 // LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
@@ -355,7 +355,7 @@ void testStructPaddingInBetweenMembers(struct Baz *baz) {
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 2
 // WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
 // WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
@@ -384,45 +384,45 @@ struct S3 {
 // LINUX-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
 // LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
-// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
 // LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
-// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 4
 // LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
 // LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
 // LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
-// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 2
 // LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
 // LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
 // LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 17
 // LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
 // LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 18
-// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 2
 // LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 19
 // LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
 // LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 20
-// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 4
 // LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 21
 // LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
 // LINUX-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 22
-// LINUX-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP12]], align 2
 // LINUX-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP0]], i32 23
 // LINUX-NEXT:    store i8 0, ptr [[TMP13]], align 1
 // LINUX-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i32 24
-// LINUX-NEXT:    store i8 0, ptr [[TMP14]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP14]], align 8
 // LINUX-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TMP0]], i32 25
 // LINUX-NEXT:    store i8 0, ptr [[TMP15]], align 1
 // LINUX-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
-// LINUX-NEXT:    store i8 0, ptr [[TMP16]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP16]], align 2
 // LINUX-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
 // LINUX-NEXT:    store i8 0, ptr [[TMP17]], align 1
 // LINUX-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
-// LINUX-NEXT:    store i8 0, ptr [[TMP18]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP18]], align 4
 // LINUX-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
 // LINUX-NEXT:    store i8 0, ptr [[TMP19]], align 1
 // LINUX-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
-// LINUX-NEXT:    store i8 0, ptr [[TMP20]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP20]], align 2
 // LINUX-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
 // LINUX-NEXT:    store i8 0, ptr [[TMP21]], align 1
 // LINUX-NEXT:    ret void
@@ -434,45 +434,45 @@ struct S3 {
 // WINDOWS-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 4
 // WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
 // WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 2
 // WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
 // WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 17
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
 // WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 18
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 2
 // WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 19
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
 // WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 20
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 4
 // WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 21
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
 // WINDOWS-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 22
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP12]], align 2
 // WINDOWS-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP0]], i32 23
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP13]], align 1
 // WINDOWS-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i32 24
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP14]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP14]], align 8
 // WINDOWS-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TMP0]], i32 25
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP15]], align 1
 // WINDOWS-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP16]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP16]], align 2
 // WINDOWS-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP17]], align 1
 // WINDOWS-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP18]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP18]], align 4
 // WINDOWS-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP19]], align 1
 // WINDOWS-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP20]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP20]], align 2
 // WINDOWS-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP21]], align 1
 // WINDOWS-NEXT:    ret void
@@ -544,27 +544,27 @@ void testArrayNoPadding(void) {
 // LINUX-NEXT:  [[ENTRY:.*:]]
 // LINUX-NEXT:    [[LD:%.*]] = alloca [2 x x86_fp80], align 16
 // LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[LD]], i32 10
-// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 2
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[LD]], i32 11
 // LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
 // LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[LD]], i32 12
-// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 4
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[LD]], i32 13
 // LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[LD]], i32 14
-// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 2
 // LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[LD]], i32 15
 // LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
 // LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[LD]], i32 26
-// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 2
 // LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[LD]], i32 27
 // LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
 // LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[LD]], i32 28
-// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 4
 // LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[LD]], i32 29
 // LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
 // LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[LD]], i32 30
-// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 2
 // LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[LD]], i32 31
 // LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
 // LINUX-NEXT:    ret void
@@ -574,27 +574,27 @@ void testArrayNoPadding(void) {
 // WINDOWS-NEXT:  [[ENTRY:.*:]]
 // WINDOWS-NEXT:    [[LD:%.*]] = alloca [2 x x86_fp80], align 16
 // WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[LD]], i32 10
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 2
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[LD]], i32 11
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
 // WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[LD]], i32 12
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 4
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[LD]], i32 13
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[LD]], i32 14
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 2
 // WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[LD]], i32 15
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
 // WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[LD]], i32 26
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 2
 // WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[LD]], i32 27
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
 // WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[LD]], i32 28
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 4
 // WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[LD]], i32 29
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
 // WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[LD]], i32 30
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 2
 // WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[LD]], i32 31
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
 // WINDOWS-NEXT:    ret void
@@ -613,25 +613,25 @@ void testArrayLongDouble() {
 // LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 5
 // LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 6
-// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 7
 // LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 13
 // LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 14
-// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 2
 // LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 15
 // LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
 // LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 21
 // LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
 // LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 22
-// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 2
 // LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 23
 // LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
 // LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 29
 // LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
 // LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[S]], i32 30
-// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 2
 // LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[S]], i32 31
 // LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
 // LINUX-NEXT:    ret void
@@ -643,25 +643,25 @@ void testArrayLongDouble() {
 // WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 5
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 6
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 7
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 13
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 14
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 2
 // WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 15
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
 // WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 21
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
 // WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 22
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 2
 // WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 23
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
 // WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 29
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
 // WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[S]], i32 30
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 2
 // WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[S]], i32 31
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
 // WINDOWS-NEXT:    ret void
@@ -764,11 +764,11 @@ typedef float Float3Vec __attribute__((ext_vector_type(3)));
 // LINUX-NEXT:    store ptr [[V]], ptr [[V_ADDR]], align 8
 // LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[V_ADDR]], align 8
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
-// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 4
 // LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
 // LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
-// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 2
 // LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
 // LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
 // LINUX-NEXT:    ret void
@@ -780,11 +780,11 @@ typedef float Float3Vec __attribute__((ext_vector_type(3)));
 // WINDOWS-NEXT:    store ptr [[V]], ptr [[V_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[V_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 4
 // WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 2
 // WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
 // WINDOWS-NEXT:    ret void
diff --git a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
index 43a644c7f81cb..f4ed888f1b0d6 100644
--- a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
+++ b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
@@ -60,15 +60,15 @@ void testPrimitiveNoPadding(int *i) {
 // LINUX-NEXT:    store ptr [[LD]], ptr [[LD_ADDR]], align 8
 // LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[LD_ADDR]], align 8
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
-// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
 // LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
-// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 4
 // LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
 // LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
 // LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
-// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 2
 // LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
 // LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
 // LINUX-NEXT:    ret void
@@ -80,15 +80,15 @@ void testPrimitiveNoPadding(int *i) {
 // WINDOWS-NEXT:    store ptr [[LD]], ptr [[LD_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[LD_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 4
 // WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
 // WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 2
 // WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
 // WINDOWS-NEXT:    ret void
@@ -106,27 +106,27 @@ void testPrimitiveLongDouble(long double *ld) {
 // LINUX-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
 // LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
-// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
 // LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
-// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 4
 // LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
 // LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
 // LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
-// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 2
 // LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
 // LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
 // LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
-// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 2
 // LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
 // LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
 // LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
-// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 4
 // LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
 // LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 1
 // LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
-// LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 2
 // LINUX-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
 // LINUX-NEXT:    store i8 0, ptr [[TMP12]], align 1
 // LINUX-NEXT:    ret void
@@ -138,27 +138,27 @@ void testPrimitiveLongDouble(long double *ld) {
 // WINDOWS-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 4
 // WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
 // WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 2
 // WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
 // WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 2
 // WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
 // WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 4
 // WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 1
 // WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 2
 // WINDOWS-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP12]], align 1
 // WINDOWS-NEXT:    ret void
@@ -213,15 +213,15 @@ union U2 {
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
 // LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
 // LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
-// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 2
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
 // LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
-// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 4
 // LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
 // LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
 // LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 2
 // LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
 // LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
 // LINUX-NEXT:    ret void
@@ -235,15 +235,15 @@ union U2 {
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
 // WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 2
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 4
 // WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
 // WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 2
 // WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
 // WINDOWS-NEXT:    ret void
@@ -289,7 +289,7 @@ struct alignas(4) Baz : Foo {
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
 // LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 2
 // LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
 // LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
 // LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
@@ -311,7 +311,7 @@ struct alignas(4) Baz : Foo {
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 2
 // WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
 // WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
@@ -338,7 +338,7 @@ void testStructPaddingInBetweenMembers(Baz *baz) {
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
 // LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 2
 // LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
 // LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
 // LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
@@ -360,7 +360,7 @@ void testStructPaddingInBetweenMembers(Baz *baz) {
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 2
 // WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
 // WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
@@ -391,7 +391,7 @@ class S2 {
 // LINUX-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
 // LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
 // LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // LINUX-NEXT:    ret void
@@ -403,7 +403,7 @@ class S2 {
 // WINDOWS-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // WINDOWS-NEXT:    ret void
@@ -425,45 +425,45 @@ struct S3 {
 // LINUX-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
 // LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
-// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
 // LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
-// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 4
 // LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
 // LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
 // LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
-// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 2
 // LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
 // LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
 // LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 17
 // LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
 // LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 18
-// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 2
 // LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 19
 // LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
 // LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 20
-// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 4
 // LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 21
 // LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
 // LINUX-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 22
-// LINUX-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP12]], align 2
 // LINUX-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP0]], i32 23
 // LINUX-NEXT:    store i8 0, ptr [[TMP13]], align 1
 // LINUX-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i32 24
-// LINUX-NEXT:    store i8 0, ptr [[TMP14]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP14]], align 8
 // LINUX-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TMP0]], i32 25
 // LINUX-NEXT:    store i8 0, ptr [[TMP15]], align 1
 // LINUX-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
-// LINUX-NEXT:    store i8 0, ptr [[TMP16]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP16]], align 2
 // LINUX-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
 // LINUX-NEXT:    store i8 0, ptr [[TMP17]], align 1
 // LINUX-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
-// LINUX-NEXT:    store i8 0, ptr [[TMP18]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP18]], align 4
 // LINUX-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
 // LINUX-NEXT:    store i8 0, ptr [[TMP19]], align 1
 // LINUX-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
-// LINUX-NEXT:    store i8 0, ptr [[TMP20]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP20]], align 2
 // LINUX-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
 // LINUX-NEXT:    store i8 0, ptr [[TMP21]], align 1
 // LINUX-NEXT:    ret void
@@ -475,45 +475,45 @@ struct S3 {
 // WINDOWS-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 4
 // WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
 // WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 2
 // WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
 // WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 17
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
 // WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 18
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 2
 // WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 19
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
 // WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 20
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 4
 // WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 21
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
 // WINDOWS-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 22
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP12]], align 2
 // WINDOWS-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP0]], i32 23
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP13]], align 1
 // WINDOWS-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i32 24
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP14]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP14]], align 8
 // WINDOWS-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TMP0]], i32 25
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP15]], align 1
 // WINDOWS-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP16]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP16]], align 2
 // WINDOWS-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP17]], align 1
 // WINDOWS-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP18]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP18]], align 4
 // WINDOWS-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP19]], align 1
 // WINDOWS-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP20]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP20]], align 2
 // WINDOWS-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP21]], align 1
 // WINDOWS-NEXT:    ret void
@@ -539,7 +539,7 @@ struct S4 : Empty, B {
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
 // LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
 // LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 2
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
 // LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // LINUX-NEXT:    ret void
@@ -553,7 +553,7 @@ struct S4 : Empty, B {
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
 // WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 2
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // WINDOWS-NEXT:    ret void
@@ -581,13 +581,13 @@ struct S5 : B1, B2 {
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
 // LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
 // LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
-// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 2
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
 // LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
 // LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
 // LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 2
 // LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
 // LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
 // LINUX-NEXT:    ret void
@@ -601,13 +601,13 @@ struct S5 : B1, B2 {
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
 // WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 2
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
 // WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 2
 // WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
 // WINDOWS-NEXT:    ret void
@@ -636,13 +636,13 @@ struct S6 : B3, B4 {
 // LINUX-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
 // LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
-// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
 // LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
 // LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 2
 // LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
 // LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
 // LINUX-NEXT:    ret void
@@ -654,13 +654,13 @@ struct S6 : B3, B4 {
 // WINDOWS-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 2
 // WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
 // WINDOWS-NEXT:    ret void
@@ -692,7 +692,7 @@ struct S7 : VirtualBase, NonVirtualBase {
 // LINUX-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
 // LINUX-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
 // LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 14
-// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 2
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 15
 // LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
 // LINUX-NEXT:    ret void
@@ -703,7 +703,7 @@ struct S7 : VirtualBase, NonVirtualBase {
 // WINDOWS-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
 // WINDOWS-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 14
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 2
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 15
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
 // WINDOWS-NEXT:    ret void
@@ -745,23 +745,23 @@ struct S8 : VirtualBase1, VirtualBase2, NonVirtualBase1, VirtualBase3 {
 // LINUX-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
 // LINUX-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
 // LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 12
-// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 4
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 13
 // LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
 // LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 14
-// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 2
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 15
 // LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 29
 // LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
 // LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 30
-// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 2
 // LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 31
 // LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
 // LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 45
 // LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
 // LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 46
-// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 2
 // LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 47
 // LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
 // LINUX-NEXT:    ret void
@@ -772,23 +772,23 @@ struct S8 : VirtualBase1, VirtualBase2, NonVirtualBase1, VirtualBase3 {
 // WINDOWS-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
 // WINDOWS-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 12
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 4
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 13
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
 // WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 14
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 2
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 15
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 29
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
 // WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 30
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 2
 // WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 31
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
 // WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 45
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
 // WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 46
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 2
 // WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 47
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
 // WINDOWS-NEXT:    ret void
@@ -832,7 +832,7 @@ struct S9 : NonVirtualBase2, VirtualChain3 {
 // LINUX-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
 // LINUX-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
 // LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 22
-// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 2
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 23
 // LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
 // LINUX-NEXT:    ret void
@@ -843,7 +843,7 @@ struct S9 : NonVirtualBase2, VirtualChain3 {
 // WINDOWS-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
 // WINDOWS-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 22
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 2
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 23
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
 // WINDOWS-NEXT:    ret void
@@ -882,19 +882,19 @@ struct S10 : D1, D2 {
 // LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 13
 // LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 14
-// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 15
 // LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 30
-// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 2
 // LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 31
 // LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
 // LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 36
-// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 4
 // LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 37
 // LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
 // LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 38
-// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 2
 // LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 39
 // LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
 // LINUX-NEXT:    ret void
@@ -907,19 +907,19 @@ struct S10 : D1, D2 {
 // WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 13
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 14
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 15
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 30
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 2
 // WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 31
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
 // WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 36
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 4
 // WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 37
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
 // WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 38
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 2
 // WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 39
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
 // WINDOWS-NEXT:    ret void
@@ -994,27 +994,27 @@ void testArrayNoPadding() {
 // LINUX-NEXT:  [[ENTRY:.*:]]
 // LINUX-NEXT:    [[LD:%.*]] = alloca [2 x x86_fp80], align 16
 // LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[LD]], i32 10
-// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 2
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[LD]], i32 11
 // LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
 // LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[LD]], i32 12
-// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 4
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[LD]], i32 13
 // LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[LD]], i32 14
-// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 2
 // LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[LD]], i32 15
 // LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
 // LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[LD]], i32 26
-// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 2
 // LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[LD]], i32 27
 // LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
 // LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[LD]], i32 28
-// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 4
 // LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[LD]], i32 29
 // LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
 // LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[LD]], i32 30
-// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 2
 // LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[LD]], i32 31
 // LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
 // LINUX-NEXT:    ret void
@@ -1024,27 +1024,27 @@ void testArrayNoPadding() {
 // WINDOWS-NEXT:  [[ENTRY:.*:]]
 // WINDOWS-NEXT:    [[LD:%.*]] = alloca [2 x x86_fp80], align 16
 // WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[LD]], i32 10
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 2
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[LD]], i32 11
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
 // WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[LD]], i32 12
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 4
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[LD]], i32 13
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[LD]], i32 14
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 2
 // WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[LD]], i32 15
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
 // WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[LD]], i32 26
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 2
 // WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[LD]], i32 27
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
 // WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[LD]], i32 28
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 4
 // WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[LD]], i32 29
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
 // WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[LD]], i32 30
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 2
 // WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[LD]], i32 31
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
 // WINDOWS-NEXT:    ret void
@@ -1063,25 +1063,25 @@ void testArrayLongDouble() {
 // LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 5
 // LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 6
-// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 7
 // LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 13
 // LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 14
-// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 2
 // LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 15
 // LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
 // LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 21
 // LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
 // LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 22
-// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 2
 // LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 23
 // LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
 // LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 29
 // LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
 // LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[S]], i32 30
-// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 2
 // LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[S]], i32 31
 // LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
 // LINUX-NEXT:    ret void
@@ -1093,25 +1093,25 @@ void testArrayLongDouble() {
 // WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 5
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 6
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 7
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 13
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 14
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 2
 // WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 15
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
 // WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 21
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
 // WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 22
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 2
 // WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 23
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
 // WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 29
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
 // WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[S]], i32 30
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 2
 // WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[S]], i32 31
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
 // WINDOWS-NEXT:    ret void
@@ -1192,7 +1192,7 @@ struct S12 {
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
 // LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
 // LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 2
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
 // LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // LINUX-NEXT:    ret void
@@ -1206,7 +1206,7 @@ struct S12 {
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
 // WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 2
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
 // WINDOWS-NEXT:    ret void
@@ -1261,7 +1261,7 @@ struct NonTriviallyCopyable {
 // LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[NTC]], i32 5
 // LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[NTC]], i32 6
-// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[NTC]], i32 7
 // LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // LINUX-NEXT:    ret void
@@ -1274,7 +1274,7 @@ struct NonTriviallyCopyable {
 // WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[NTC]], i32 5
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[NTC]], i32 6
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
 // WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[NTC]], i32 7
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // WINDOWS-NEXT:    ret void
@@ -1292,11 +1292,11 @@ typedef float Float3Vec __attribute__((ext_vector_type(3)));
 // LINUX-NEXT:    store ptr [[V]], ptr [[V_ADDR]], align 8
 // LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[V_ADDR]], align 8
 // LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
-// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 4
 // LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
 // LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
-// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 2
 // LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
 // LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
 // LINUX-NEXT:    ret void
@@ -1308,11 +1308,11 @@ typedef float Float3Vec __attribute__((ext_vector_type(3)));
 // WINDOWS-NEXT:    store ptr [[V]], ptr [[V_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[V_ADDR]], align 8
 // WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 4
 // WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
 // WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
-// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 2
 // WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
 // WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
 // WINDOWS-NEXT:    ret void

>From d04785d759eb06774ee7b4fa76c754d1af3df5f3 Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Fri, 1 May 2026 19:05:14 +0100
Subject: [PATCH 21/22] ci

---
 libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
index 52b4039ae484e..2bc6917b681c0 100644
--- a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
+++ b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // UNSUPPORTED: c++03
 // UNSUPPORTED: gcc
-// UNSUPPORTED: clang-19, clang-20, clang-21, clang-22, apple-clang-17
+// UNSUPPORTED: clang-19, clang-20, clang-21, clang-22, clang-23, apple-clang-17, apple-clang-21
 
 // ADDITIONAL_COMPILE_FLAGS: -Wno-deprecated-volatile -Wno-dynamic-class-memaccess
 

>From 188c60aa9a492be8f8899edca9f4b7bdd5842d93 Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sat, 2 May 2026 10:46:25 +0100
Subject: [PATCH 22/22] bitint

---
 clang/lib/CodeGen/CGBuiltin.cpp               | 30 +++++++++++---
 clang/lib/Sema/SemaChecking.cpp               |  8 ++--
 .../CodeGen/builtin-clear-padding-codegen.c   | 41 +++++++++++++++++++
 .../builtin-clear-padding-codegen.cpp         | 41 +++++++++++++++++++
 .../atomics/builtin_clear_padding.pass.cpp    | 14 +++++++
 5 files changed, 124 insertions(+), 10 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index fc728217c9e59..01cf2ea90abfa 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -28,6 +28,7 @@
 #include "clang/AST/StmtVisitor.h"
 #include "clang/Basic/DiagnosticFrontend.h"
 #include "clang/Basic/TargetInfo.h"
+#include "llvm/ADT/APFloat.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Intrinsics.h"
@@ -2746,6 +2747,27 @@ struct PaddingClearer {
     bool VisitVirtualBase;
   };
 
+  uint64_t getScalarOccupiedSizeInBits(QualType Ty) const {
+    if (const auto *BIT = Ty->getAs<BitIntType>())
+      return BIT->getNumBits();
+
+    if (const auto *BT = Ty->getAs<BuiltinType>()) {
+      if (BT->getKind() == BuiltinType::LongDouble &&
+          &CGF.getTarget().getLongDoubleFormat() ==
+              &APFloat::x87DoubleExtended())
+        return APFloat::getSizeInBits(CGF.getTarget().getLongDoubleFormat());
+    }
+
+    if (const auto *VT = Ty->getAs<clang::VectorType>()) {
+      ASTContext &Ctx = CGF.getContext();
+      if (VT->isPackedVectorBoolType(Ctx))
+        return VT->getNumElements();
+      return Ctx.getTypeInfo(VT->getElementType()).Width * VT->getNumElements();
+    }
+
+    return CGF.getContext().getTypeSize(Ty);
+  }
+
   void Visit(const Data &D) {
     if (auto *AT = dyn_cast<ConstantArrayType>(D.Ty)) {
       VisitArray(AT, D.StartBitOffset);
@@ -2769,11 +2791,7 @@ struct PaddingClearer {
       return;
     }
 
-    auto *Type = CGF.ConvertTypeForMem(D.Ty);
-    auto SizeBit = CGF.CGM.getModule()
-                       .getDataLayout()
-                       .getTypeSizeInBits(Type)
-                       .getKnownMinValue();
+    uint64_t SizeBit = getScalarOccupiedSizeInBits(D.Ty);
     OccuppiedIntervals.push_back(
         BitInterval{D.StartBitOffset, D.StartBitOffset + SizeBit});
   }
@@ -2889,7 +2907,7 @@ struct PaddingClearer {
     llvm::SmallVector<BitInterval> Results;
     if (OccuppiedIntervals.size() == 1 &&
         OccuppiedIntervals.front().First == 0 &&
-        OccuppiedIntervals.end()->Last == SizeInBits) {
+        OccuppiedIntervals.front().Last == SizeInBits) {
       return Results;
     }
     Results.reserve(OccuppiedIntervals.size() + 1);
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 851ed3128e938..94164b75d510a 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3150,13 +3150,13 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
     // i.e. __builtin_clear_padding(&var) is OK as long as var is a complete
     // object, either a local variable or a function parameter passed by value
     auto IsAddrOfDeclExpr = [&]() {
-      const Expr *IgnoreCastsAndParens = PtrArg->IgnoreCasts();
-      IgnoreCastsAndParens = IgnoreCastsAndParens->IgnoreParens();
-      const auto *UnaryOp = dyn_cast<UnaryOperator>(IgnoreCastsAndParens);
+      const Expr *Inner = PtrArg->IgnoreParenNoopCasts(Context);
+      const auto *UnaryOp = dyn_cast<UnaryOperator>(Inner);
       if (!UnaryOp || UnaryOp->getOpcode() != UO_AddrOf)
         return false;
 
-      const Expr *Operand = UnaryOp->getSubExpr()->IgnoreParens();
+      const Expr *Operand =
+          UnaryOp->getSubExpr()->IgnoreParenNoopCasts(Context);
       const auto *DeclRef = dyn_cast<DeclRefExpr>(Operand);
       if (!DeclRef)
         return false;
diff --git a/clang/test/CodeGen/builtin-clear-padding-codegen.c b/clang/test/CodeGen/builtin-clear-padding-codegen.c
index 32086250cf67f..74846d7d95d6a 100644
--- a/clang/test/CodeGen/builtin-clear-padding-codegen.c
+++ b/clang/test/CodeGen/builtin-clear-padding-codegen.c
@@ -93,6 +93,47 @@ void testPrimitiveLongDouble(long double *ld) {
   __builtin_clear_padding(ld);
 }
 
+// LINUX-LABEL: define dso_local void @testBitInt(
+// LINUX-SAME: ptr noundef [[BI:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[BI_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[BI]], ptr [[BI_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BI_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// LINUX-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 4
+// LINUX-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], 1
+// LINUX-NEXT:    store i8 [[TMP3]], ptr [[TMP1]], align 4
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testBitInt(
+// WINDOWS-SAME: ptr noundef [[BI:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[BI_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[BI]], ptr [[BI_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BI_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// WINDOWS-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 4
+// WINDOWS-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], 1
+// WINDOWS-NEXT:    store i8 [[TMP3]], ptr [[TMP1]], align 4
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testBitInt(_BitInt(97) *bi) {
+  // Storage is widened to 128 bits; clear bits [97, 128).
+  __builtin_clear_padding(bi);
+}
+
 
 // LINUX-LABEL: define dso_local void @testPrimitiveComplexLongDouble(
 // LINUX-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
diff --git a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
index f4ed888f1b0d6..5db0699636200 100644
--- a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
+++ b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
@@ -98,6 +98,47 @@ void testPrimitiveLongDouble(long double *ld) {
   __builtin_clear_padding(ld);
 }
 
+// LINUX-LABEL: define dso_local void @_Z10testBitIntPDB97_(
+// LINUX-SAME: ptr noundef [[BI:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[BI_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[BI]], ptr [[BI_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BI_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// LINUX-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 4
+// LINUX-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], 1
+// LINUX-NEXT:    store i8 [[TMP3]], ptr [[TMP1]], align 4
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z10testBitIntPDB97_(
+// WINDOWS-SAME: ptr noundef [[BI:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[BI_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[BI]], ptr [[BI_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BI_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// WINDOWS-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 4
+// WINDOWS-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], 1
+// WINDOWS-NEXT:    store i8 [[TMP3]], ptr [[TMP1]], align 4
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testBitInt(_BitInt(97) *bi) {
+  // Storage is widened to 128 bits; clear bits [97, 128).
+  __builtin_clear_padding(bi);
+}
+
 
 // LINUX-LABEL: define dso_local void @_Z30testPrimitiveComplexLongDoublePCe(
 // LINUX-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
diff --git a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
index 2bc6917b681c0..6004ce8496a23 100644
--- a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
+++ b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
@@ -248,6 +248,20 @@ void primitiveTests() {
     assert(memcmp(&d1, &d2, sizeof(long double)) == 0);
   }
 
+  // _BitInt
+  {
+    using T = _BitInt(97);
+    T i1, i2;
+    memset(&i1, 42, sizeof(T));
+    memset(&i2, 0, sizeof(T));
+
+    i1 = 37;
+    i2 = 37;
+    __builtin_clear_padding(&i1);
+    assert(i1 == 37);
+    assert(memcmp(&i1, &i2, sizeof(T)) == 0);
+  }
+
   // _Complex
   {
     _Complex long double c1, c2;



More information about the libcxx-commits mailing list