[libcxx-commits] [libcxx] 698d44b - [clang] Add builtin to clear padding bytes (prework for P0528R3) (#75371)

via libcxx-commits libcxx-commits at lists.llvm.org
Wed May 27 00:48:37 PDT 2026


Author: Hui
Date: 2026-05-27T08:48:30+01:00
New Revision: 698d44bf9ff61cd8c924bcf406c29c9988d5ea5e

URL: https://github.com/llvm/llvm-project/commit/698d44bf9ff61cd8c924bcf406c29c9988d5ea5e
DIFF: https://github.com/llvm/llvm-project/commit/698d44bf9ff61cd8c924bcf406c29c9988d5ea5e.diff

LOG: [clang] Add builtin to clear padding bytes (prework for P0528R3) (#75371)

Add builtin to clear padding bytes. This is the pre-work to implement
`std::atomic::compare_exchange_[weak/strong]` that ignores padding bits.
PR draft here: https://github.com/llvm/llvm-project/pull/76180

This PR picked up this patch from 3 years ago
https://reviews.llvm.org/D87974

The above patch no longer works as things changed quite a lot. I've made
some changes on top of the above patch:


it handles:
- struct
- builtin types with paddings (like `long double` and types with
`__attribute__((ext_vector_type(N)))`
- _Complex long double
- constant array
- union
- bit field
- types with virtual functions
- types with virtual bases

---------

Co-authored-by: zoecarver <z.zoelec2 at gmail.com>

Added: 
    clang/test/CodeGen/builtin-clear-padding-codegen.c
    clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
    clang/test/Sema/builtin-clear-padding.c
    clang/test/SemaCXX/builtin-clear-padding.cpp
    libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp

Modified: 
    clang/include/clang/Basic/Builtins.td
    clang/include/clang/Basic/DiagnosticSemaKinds.td
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/Sema/SemaChecking.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 84799929cee87..8d2a824ef5610 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1190,6 +1190,12 @@ def IsWithinLifetime : LangBuiltin<"CXX_LANG"> {
   let Prototype = "bool(void*)";
 }
 
+def ClearPadding : Builtin {
+  let Spellings = ["__builtin_clear_padding"];
+  let Attributes = [NoThrow, CustomTypeChecking];
+  let Prototype = "void(void*)";
+}
+
 def GetVtablePointer : LangBuiltin<"CXX_LANG"> {
   let Spellings = ["__builtin_get_vtable_pointer"];
   let Attributes = [CustomTypeChecking, NoThrow, Const];

diff  --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 5856a02bfbda1..e330ea03d0544 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9678,6 +9678,11 @@ def warn_atomic_stripped_in_enum : Warning<
   "'_Atomic' qualifier ignored; operations involving the enumeration type will "
   "be non-atomic">,
   InGroup<DiagGroup<"underlying-atomic-qualifier-ignored">>, DefaultError;
+def err_clear_padding_needs_trivial_copy : Error<
+  "argument to __builtin_clear_padding must be a pointer to a "
+  "trivially-copyable type (%0 invalid)">;
+def err_clear_padding_no_flexible_array : Error<
+  "%0 has flexible array member, which is unsupported by __builtin_clear_padding">;
 
 def err_overflow_builtin_must_be_int : Error<
   "operand argument to %select{overflow builtin|checked integer operation}0 "

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b1d727cb5e0ad..f4ded5c7b3f08 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -28,6 +28,7 @@
 #include "clang/AST/StmtVisitor.h"
 #include "clang/Basic/DiagnosticFrontend.h"
 #include "clang/Basic/TargetInfo.h"
+#include "llvm/ADT/APFloat.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Intrinsics.h"
@@ -35,6 +36,7 @@
 #include "llvm/IR/MatrixBuilder.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/ScopedPrinter.h"
+#include <algorithm>
 #include <optional>
 #include <utility>
 
@@ -2669,6 +2671,341 @@ RValue CodeGenFunction::emitStdcFirstBit(const CallExpr *E, Intrinsic::ID IntID,
   return RValue::get(Result);
 }
 
+namespace {
+
+// PaddingClearer is a utility class that clears padding bits in a
+// c/c++ type. It traverses the type recursively, collecting occupied
+// bit intervals, and then computes the padding intervals.
+// In the end, it clears the padding bits by writing zeros
+// to the padding intervals bytes-by-bytes. If a byte only contains
+// some padding bits, it writes zeros to only those bits. This is
+// the case for bit-fields.
+struct PaddingClearer {
+  PaddingClearer(CodeGenFunction &F)
+      : CGF(F), CharWidth(CGF.getContext().getCharWidth()) {}
+
+  void run(Address Src, QualType Ty) {
+    OccuppiedIntervals.clear();
+    Stack.clear();
+
+    Stack.push_back(Data{0, Ty, true});
+    while (!Stack.empty()) {
+      auto Current = Stack.back();
+      Stack.pop_back();
+      Visit(Current);
+    }
+
+    MergeOccuppiedIntervals();
+    auto PaddingIntervals =
+        GetPaddingIntervals(CGF.getContext().getTypeSize(Ty));
+    for (const auto &Interval : PaddingIntervals) {
+      ClearPadding(Src, Interval);
+    }
+  }
+
+private:
+  struct BitInterval {
+    // [First, Last)
+    uint64_t First;
+    uint64_t Last;
+  };
+
+  struct Data {
+    uint64_t StartBitOffset;
+    QualType Ty;
+    bool VisitVirtualBase;
+  };
+
+  // Return the number of non padding bits of a scalar type.
+  //
+  // The property that we specifically care about here is whether the scalar
+  // type has padding bits, i.e. are there bits in the type which are not
+  // specified by the ABI.
+  //
+  // We currently don't care about this anywhere else in clang: layout cares
+  // about the ABI size, calling convention code cares about specific types, but
+  // nothing cares about padding specifically. And it's not something we can
+  // easily query from LLVM due to the type system mismatches.
+  // DL.getTypeSizeInBits(convertTypeForLoadStore(T)) is probably close, but the
+  // DataLayout methods aren't really designed for this usage.
+  //
+  // Therefore, it is better to explicitly list all the scalar types containing
+  // padding bits that we know of, namely, _BitInt(N) and x87 long double.
+  uint64_t getScalarOccupiedSizeInBits(QualType Ty) const {
+    if (const auto *BIT = Ty->getAs<BitIntType>())
+      return BIT->getNumBits();
+
+    if (const auto *BT = Ty->getAs<BuiltinType>()) {
+      if (BT->getKind() == BuiltinType::LongDouble &&
+          &CGF.getTarget().getLongDoubleFormat() ==
+              &APFloat::x87DoubleExtended())
+        return APFloat::getSizeInBits(CGF.getTarget().getLongDoubleFormat());
+    }
+
+    return CGF.getContext().getTypeSize(Ty);
+  }
+
+  void Visit(const Data &D) {
+    if (auto *AT = dyn_cast<ConstantArrayType>(D.Ty)) {
+      VisitArray(AT, D.StartBitOffset);
+      return;
+    }
+
+    if (auto *Record = D.Ty->getAsRecordDecl()) {
+      VisitStruct(Record, D.StartBitOffset, D.VisitVirtualBase);
+      return;
+    }
+
+    if (D.Ty->isAtomicType()) {
+      auto Unwrapped = D;
+      Unwrapped.Ty = D.Ty.getAtomicUnqualifiedType();
+      Stack.push_back(Unwrapped);
+      return;
+    }
+
+    if (const auto *Complex = D.Ty->getAs<ComplexType>()) {
+      VisitComplex(Complex, D.StartBitOffset);
+      return;
+    }
+
+    if (const auto *VT = D.Ty->getAs<clang::VectorType>()) {
+      VisitVector(VT, D.StartBitOffset);
+      return;
+    }
+
+    uint64_t SizeBit = getScalarOccupiedSizeInBits(D.Ty);
+    OccuppiedIntervals.push_back(
+        BitInterval{D.StartBitOffset, D.StartBitOffset + SizeBit});
+  }
+
+  void VisitArray(const ConstantArrayType *AT, uint64_t StartBitOffset) {
+    for (uint64_t ArrIndex = 0; ArrIndex < AT->getSize().getLimitedValue();
+         ++ArrIndex) {
+
+      QualType ElementQualType = AT->getElementType();
+      auto ElementSize = CGF.getContext().getTypeSizeInChars(ElementQualType);
+      auto ElementAlign = CGF.getContext().getTypeAlignInChars(ElementQualType);
+      auto Offset = ElementSize.alignTo(ElementAlign);
+
+      Stack.push_back(
+          Data{StartBitOffset + ArrIndex * Offset.getQuantity() * CharWidth,
+               ElementQualType, /*VisitVirtualBase*/ true});
+    }
+  }
+
+  void VisitStruct(const RecordDecl *R, uint64_t StartBitOffset,
+                   bool VisitVirtualBase) {
+    const auto &DL = CGF.CGM.getModule().getDataLayout();
+    const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R);
+
+    auto *CXXRecord = dyn_cast<CXXRecordDecl>(R);
+
+    if (CXXRecord) {
+      if (ASTLayout.hasOwnVFPtr()) {
+        OccuppiedIntervals.push_back(BitInterval{
+            StartBitOffset, StartBitOffset + DL.getPointerSizeInBits()});
+      }
+
+      const auto VisitBase = [&ASTLayout, StartBitOffset, this](
+                                 const CXXBaseSpecifier &Base, auto GetOffset) {
+        auto *BaseRecord = Base.getType()->getAsCXXRecordDecl();
+        if (!BaseRecord) {
+          return;
+        }
+        auto BaseOffset =
+            std::invoke(GetOffset, ASTLayout, BaseRecord).getQuantity();
+
+        Stack.push_back(Data{StartBitOffset + BaseOffset * CharWidth,
+                             Base.getType(), /*VisitVirtualBase*/ false});
+      };
+
+      for (auto Base : CXXRecord->bases()) {
+        if (!Base.isVirtual()) {
+          VisitBase(Base, &ASTRecordLayout::getBaseClassOffset);
+        }
+      }
+
+      if (VisitVirtualBase) {
+        for (auto VBase : CXXRecord->vbases()) {
+          VisitBase(VBase, &ASTRecordLayout::getVBaseClassOffset);
+        }
+      }
+    }
+
+    for (auto *Field : R->fields()) {
+      auto FieldOffset = ASTLayout.getFieldOffset(Field->getFieldIndex());
+      if (Field->isBitField()) {
+        OccuppiedIntervals.push_back(BitInterval{
+            StartBitOffset + FieldOffset,
+            StartBitOffset + FieldOffset + Field->getBitWidthValue()});
+      } else {
+        Stack.push_back(Data{StartBitOffset + FieldOffset, Field->getType(),
+                             /*VisitVirtualBase*/ true});
+      }
+    }
+  }
+
+  void VisitComplex(const ComplexType *CT, uint64_t StartBitOffset) {
+    QualType ElementQualType = CT->getElementType();
+    auto ElementSize = CGF.getContext().getTypeSizeInChars(ElementQualType);
+    auto ElementAlign = CGF.getContext().getTypeAlignInChars(ElementQualType);
+    auto ImgOffset = ElementSize.alignTo(ElementAlign);
+
+    Stack.push_back(
+        Data{StartBitOffset, ElementQualType, /*VisitVirtualBase*/ true});
+    Stack.push_back(Data{StartBitOffset + ImgOffset.getQuantity() * CharWidth,
+                         ElementQualType, /*VisitVirtualBase*/ true});
+  }
+
+  void VisitVector(const clang::VectorType *VT, uint64_t StartBitOffset) {
+    ASTContext &Ctx = CGF.getContext();
+    uint64_t SizeBit = [&]() -> uint64_t {
+      if (VT->isPackedVectorBoolType(Ctx))
+        return VT->getNumElements();
+      return getScalarOccupiedSizeInBits(VT->getElementType()) *
+             VT->getNumElements();
+    }();
+    OccuppiedIntervals.push_back(
+        BitInterval{StartBitOffset, StartBitOffset + SizeBit});
+  }
+
+  void MergeOccuppiedIntervals() {
+    std::sort(OccuppiedIntervals.begin(), OccuppiedIntervals.end(),
+              [](const BitInterval &lhs, const BitInterval &rhs) {
+                return std::tie(lhs.First, lhs.Last) <
+                       std::tie(rhs.First, rhs.Last);
+              });
+
+    llvm::SmallVector<BitInterval> Merged;
+    Merged.reserve(OccuppiedIntervals.size());
+
+    for (const BitInterval &NextInterval : OccuppiedIntervals) {
+      if (Merged.empty()) {
+        Merged.push_back(NextInterval);
+        continue;
+      }
+      auto &LastInterval = Merged.back();
+
+      if (NextInterval.First > LastInterval.Last) {
+        Merged.push_back(NextInterval);
+      } else {
+        LastInterval.Last = std::max(LastInterval.Last, NextInterval.Last);
+      }
+    }
+
+    OccuppiedIntervals = Merged;
+  }
+
+  llvm::SmallVector<BitInterval>
+  GetPaddingIntervals(uint64_t SizeInBits) const {
+    llvm::SmallVector<BitInterval> Results;
+    if (OccuppiedIntervals.size() == 1 &&
+        OccuppiedIntervals.front().First == 0 &&
+        OccuppiedIntervals.front().Last == SizeInBits) {
+      return Results;
+    }
+    Results.reserve(OccuppiedIntervals.size() + 1);
+    uint64_t CurrentPos = 0;
+    for (const BitInterval &OccupiedInterval : OccuppiedIntervals) {
+      if (OccupiedInterval.First > CurrentPos) {
+        Results.push_back(BitInterval{CurrentPos, OccupiedInterval.First});
+      }
+      CurrentPos = OccupiedInterval.Last;
+    }
+    if (SizeInBits > CurrentPos) {
+      Results.push_back(BitInterval{CurrentPos, SizeInBits});
+    }
+    return Results;
+  }
+
+  void ClearPadding(Address Src, const BitInterval &PaddingInterval) {
+    auto *I8Ptr =
+        CGF.Builder.CreateBitCast(Src.getBasePointer(), CGF.Int8PtrTy);
+    auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
+
+    // Calculate byte indices and bit positions
+    auto StartByte = PaddingInterval.First / CharWidth;
+    auto StartBit = PaddingInterval.First % CharWidth;
+    auto EndByte = PaddingInterval.Last / CharWidth;
+    auto EndBit = PaddingInterval.Last % CharWidth;
+
+    if (StartByte == EndByte) {
+      // Interval is within a single byte
+      auto *Index = ConstantInt::get(CGF.IntTy, StartByte);
+      auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+      Address ElementAddr(Element, CGF.Int8Ty,
+                          Src.getAlignment().alignmentAtOffset(
+                              CharUnits::fromQuantity(StartByte)));
+
+      auto *Value = CGF.Builder.CreateLoad(ElementAddr);
+
+      // Create mask to clear bits within the byte
+      // We want to clear bits from StartBit to EndBit-1
+      uint8_t bitsToClear = ((1 << EndBit) - 1) & ~((1 << StartBit) - 1);
+      uint8_t bitsToKeep = ~bitsToClear;
+      auto *MaskValue = ConstantInt::get(CGF.Int8Ty, bitsToKeep);
+      auto *NewValue = CGF.Builder.CreateAnd(Value, MaskValue);
+
+      CGF.Builder.CreateStore(NewValue, ElementAddr);
+    } else {
+      // Handle the start byte
+      if (StartBit != 0) {
+        auto *Index = ConstantInt::get(CGF.IntTy, StartByte);
+        auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+        Address ElementAddr(Element, CGF.Int8Ty,
+                            Src.getAlignment().alignmentAtOffset(
+                                CharUnits::fromQuantity(StartByte)));
+
+        auto *Value = CGF.Builder.CreateLoad(ElementAddr);
+
+        uint8_t bitsToClear = ((1 << (CharWidth - StartBit)) - 1) << StartBit;
+        uint8_t bitsToKeep = ~bitsToClear;
+        auto *MaskValue = ConstantInt::get(CGF.Int8Ty, bitsToKeep);
+        auto *NewValue = CGF.Builder.CreateAnd(Value, MaskValue);
+
+        CGF.Builder.CreateStore(NewValue, ElementAddr);
+        ++StartByte;
+      }
+
+      // Handle full bytes in the middle
+      for (auto Offset = StartByte; Offset < EndByte; ++Offset) {
+        auto *Index = ConstantInt::get(CGF.IntTy, Offset);
+        auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+        Address ElementAddr(Element, CGF.Int8Ty,
+                            Src.getAlignment().alignmentAtOffset(
+                                CharUnits::fromQuantity(Offset)));
+
+        CGF.Builder.CreateStore(Zero, ElementAddr);
+      }
+
+      // Handle the end byte
+      if (EndBit != 0) {
+        auto *Index = ConstantInt::get(CGF.IntTy, EndByte);
+        auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+        Address ElementAddr(Element, CGF.Int8Ty,
+                            Src.getAlignment().alignmentAtOffset(
+                                CharUnits::fromQuantity(EndByte)));
+
+        auto *Value = CGF.Builder.CreateLoad(ElementAddr);
+
+        uint8_t bitsToClear = (1 << EndBit) - 1;
+        uint8_t bitsToKeep = ~bitsToClear;
+        auto *MaskValue = ConstantInt::get(CGF.Int8Ty, bitsToKeep);
+        auto *NewValue = CGF.Builder.CreateAnd(Value, MaskValue);
+
+        CGF.Builder.CreateStore(NewValue, ElementAddr);
+      }
+    }
+  }
+
+  CodeGenFunction &CGF;
+  const uint64_t CharWidth;
+  llvm::SmallVector<Data> Stack;
+  llvm::SmallVector<BitInterval> OccuppiedIntervals;
+};
+
+} // namespace
+
 RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
                                         const CallExpr *E,
                                         ReturnValueSlot ReturnValue) {
@@ -5159,6 +5496,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
 
     return RValue::get(Ptr);
   }
+  case Builtin::BI__builtin_clear_padding: {
+    Address Src = EmitPointerWithAlignment(E->getArg(0));
+    auto PointeeTy = E->getArg(0)->getType()->getPointeeType();
+    PaddingClearer clearer{*this};
+    clearer.run(Src, PointeeTy);
+    return RValue::get(nullptr);
+  }
   case Builtin::BI__sync_fetch_and_add:
   case Builtin::BI__sync_fetch_and_sub:
   case Builtin::BI__sync_fetch_and_or:

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 0609c05bbafe1..2cf8221d933fd 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3130,7 +3130,71 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
     return BuiltinIsWithinLifetime(*this, TheCall);
   case Builtin::BI__builtin_trivially_relocate:
     return BuiltinTriviallyRelocate(*this, TheCall);
+  case Builtin::BI__builtin_clear_padding: {
+    if (checkArgCount(TheCall, 1))
+      return ExprError();
+
+    const Expr *PtrArg = TheCall->getArg(0);
+    const QualType PtrArgType = PtrArg->getType();
+    if (!PtrArgType->isPointerType()) {
+      Diag(PtrArg->getBeginLoc(), diag::err_typecheck_convert_incompatible)
+          << PtrArgType << "pointer" << 1 << 0 << 3 << 1 << PtrArgType
+          << "pointer";
+      return ExprError();
+    }
+    QualType PointeeType = PtrArgType->getPointeeType();
+    if (PointeeType.isConstQualified()) {
+      Diag(PtrArg->getBeginLoc(), diag::err_typecheck_assign_const)
+          << TheCall->getSourceRange() << 4 /*ConstUnknown*/;
+      return ExprError();
+    }
+    if (RequireCompleteType(PtrArg->getBeginLoc(), PointeeType,
+                            diag::err_typecheck_decl_incomplete_type))
+      return ExprError();
+
+    // For non trivially copyable types, we try to match gcc's behaviour.
+    // i.e. __builtin_clear_padding(&var) is OK as long as var is a complete
+    // object, either a local variable or a function parameter passed by value
+    auto IsAddrOfDeclExpr = [&]() {
+      const Expr *Inner = PtrArg->IgnoreParenNoopCasts(Context);
+      const auto *UnaryOp = dyn_cast<UnaryOperator>(Inner);
+      if (!UnaryOp || UnaryOp->getOpcode() != UO_AddrOf)
+        return false;
+
+      const Expr *Operand =
+          UnaryOp->getSubExpr()->IgnoreParenNoopCasts(Context);
+      const auto *DeclRef = dyn_cast<DeclRefExpr>(Operand);
+      if (!DeclRef)
+        return false;
 
+      const auto *VarDecl = dyn_cast<::clang::VarDecl>(DeclRef->getDecl());
+      if (!VarDecl || VarDecl->getType()->isReferenceType())
+        return false;
+
+      // matching GCC behaviour
+      // __builtin_clear_padding((X*)&var) is fine as long X is the type of var
+      QualType VarQType = VarDecl->getType();
+      return PointeeType.getTypePtr() == VarQType.getTypePtr() ||
+             Context.hasSameUnqualifiedType(PointeeType, VarQType);
+    };
+
+    if (!PointeeType.isTriviallyCopyableType(Context) &&
+        !PointeeType->isAtomicType() // _Atomic is not copyable
+        && !IsAddrOfDeclExpr()) {
+      Diag(PtrArg->getBeginLoc(), diag::err_clear_padding_needs_trivial_copy)
+          << PtrArg->getType() << PtrArg->getSourceRange();
+      return ExprError();
+    }
+
+    if (auto *Record = PointeeType->getAsRecordDecl();
+        Record && Record->hasFlexibleArrayMember()) {
+      Diag(PtrArg->getBeginLoc(), diag::err_clear_padding_no_flexible_array)
+          << PointeeType << PtrArg->getSourceRange();
+      return ExprError();
+    }
+
+    break;
+  }
   case Builtin::BI__sync_fetch_and_add:
   case Builtin::BI__sync_fetch_and_add_1:
   case Builtin::BI__sync_fetch_and_add_2:

diff  --git a/clang/test/CodeGen/builtin-clear-padding-codegen.c b/clang/test/CodeGen/builtin-clear-padding-codegen.c
new file mode 100644
index 0000000000000..9d5e06c79fa94
--- /dev/null
+++ b/clang/test/CodeGen/builtin-clear-padding-codegen.c
@@ -0,0 +1,993 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -std=c11 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=LINUX
+// RUN: %clang_cc1 -std=c11 -triple=x86_64-windows-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=WINDOWS
+
+struct Empty {};
+
+// LINUX-LABEL: define dso_local void @testEmpty(
+// LINUX-SAME: ptr noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[E_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[E]], ptr [[E_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testEmpty(
+// WINDOWS-SAME: ptr noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[E_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[E]], ptr [[E_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
+// WINDOWS-NEXT:    ret void
+//
+void testEmpty(struct Empty *e) {
+  // Emtpy struct is empty in C, no padding
+  __builtin_clear_padding(e);
+}
+
+
+// LINUX-LABEL: define dso_local void @testPrimitiveNoPadding(
+// LINUX-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[I_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[I]], ptr [[I_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testPrimitiveNoPadding(
+// WINDOWS-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[I_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[I]], ptr [[I_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// WINDOWS-NEXT:    ret void
+//
+void testPrimitiveNoPadding(int *i) {
+  // This should not clear any padding, since int has no padding.
+  __builtin_clear_padding(i);
+}
+
+
+// LINUX-LABEL: define dso_local void @testPrimitiveLongDouble(
+// LINUX-SAME: ptr noundef [[LD:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[LD_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[LD]], ptr [[LD_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[LD_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 4
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testPrimitiveLongDouble(
+// WINDOWS-SAME: ptr noundef [[LD:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[LD_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[LD]], ptr [[LD_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[LD_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 4
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testPrimitiveLongDouble(long double *ld) {
+  // padding [10, 15] on x86
+  __builtin_clear_padding(ld);
+}
+
+// LINUX-LABEL: define dso_local void @testBitInt(
+// LINUX-SAME: ptr noundef [[BI:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[BI_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[BI]], ptr [[BI_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BI_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// LINUX-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 4
+// LINUX-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], 1
+// LINUX-NEXT:    store i8 [[TMP3]], ptr [[TMP1]], align 4
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testBitInt(
+// WINDOWS-SAME: ptr noundef [[BI:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[BI_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[BI]], ptr [[BI_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BI_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// WINDOWS-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 4
+// WINDOWS-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], 1
+// WINDOWS-NEXT:    store i8 [[TMP3]], ptr [[TMP1]], align 4
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testBitInt(_BitInt(97) *bi) {
+  // Storage is widened to 128 bits; clear bits [97, 128).
+  __builtin_clear_padding(bi);
+}
+
+
+// LINUX-LABEL: define dso_local void @testPrimitiveComplexLongDouble(
+// LINUX-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 4
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 2
+// LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
+// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 4
+// LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 2
+// LINUX-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// LINUX-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testPrimitiveComplexLongDouble(
+// WINDOWS-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 4
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 2
+// WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 4
+// WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 2
+// WINDOWS-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testPrimitiveComplexLongDouble(_Complex long double *c) {
+  // padding [10, 15] and [26, 31] on x86
+  __builtin_clear_padding(c);
+}
+
+union U1 {
+  int i;
+  char c;
+};
+
+// LINUX-LABEL: define dso_local void @testUnionDifferentLength(
+// LINUX-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testUnionDifferentLength(
+// WINDOWS-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// WINDOWS-NEXT:    ret void
+//
+void testUnionDifferentLength(union U1 *u) {
+  // This should not clear the object representation bits of the non-active member.
+  __builtin_clear_padding(u);
+}
+
+struct S {
+  __attribute__((aligned(8))) char c1;
+};
+
+union U2 {
+  struct S s1;
+  char c2;
+};
+
+// LINUX-LABEL: define dso_local void @testUnionTailPaddingOfLongestMember(
+// LINUX-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 2
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 4
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 2
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testUnionTailPaddingOfLongestMember(
+// WINDOWS-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 2
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 4
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 2
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testUnionTailPaddingOfLongestMember(union U2 *u) {
+  // This should clear the tail padding of the longest member.
+  // [1 - 7]
+  __builtin_clear_padding(u);
+}
+
+
+struct __attribute__((aligned(4))) Foo {
+  char a;
+  _Alignas(2) char b;
+};
+
+struct __attribute__((aligned(4))) Bar {
+  char c;
+  _Alignas(2) char d;
+};
+
+struct __attribute__((aligned(4))) Baz {
+  struct Foo foo;
+  char e;
+  struct Bar bar;
+};
+
+// Baz structure:
+// "a", PAD_1, "b", PAD_2, "c", PAD_3, PAD_4, PAD_5, "c", PAD_6, "d", PAD_7
+// %struct.Baz = type { %struct.Foo, i8, [3 x i8], %struct.Bar }
+// %struct.Foo = type { i8, i8, i8, i8 }
+// %struct.Bar = type { i8, i8, i8, i8 }
+
+// LINUX-LABEL: define dso_local void @testStructPaddingInBetweenMembers(
+// LINUX-SAME: ptr noundef [[BAZ:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[BAZ_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[BAZ]], ptr [[BAZ_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAZ_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 2
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testStructPaddingInBetweenMembers(
+// WINDOWS-SAME: ptr noundef [[BAZ:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[BAZ_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[BAZ]], ptr [[BAZ_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAZ_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 2
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testStructPaddingInBetweenMembers(struct Baz *baz) {
+  // this should clear all the padding in between various members
+  __builtin_clear_padding(baz);
+}
+
+// LINUX-LABEL: define dso_local void @testStructVolatile(
+// LINUX-SAME: ptr noundef [[BAZ:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[BAZ_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[BAZ]], ptr [[BAZ_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAZ_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 2
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testStructVolatile(
+// WINDOWS-SAME: ptr noundef [[BAZ:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[BAZ_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[BAZ]], ptr [[BAZ_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAZ_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 2
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testStructVolatile(volatile struct Baz *baz) {
+  // this should clear all the padding in between various members
+  __builtin_clear_padding(baz);
+}
+
+
+
+
+struct S3 {
+  long double l;
+  _Bool b;
+};
+
+// LINUX-LABEL: define dso_local void @testStructWithLongDouble(
+// LINUX-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 4
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 17
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 18
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 2
+// LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 19
+// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 20
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 4
+// LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 21
+// LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// LINUX-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 22
+// LINUX-NEXT:    store i8 0, ptr [[TMP12]], align 2
+// LINUX-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP0]], i32 23
+// LINUX-NEXT:    store i8 0, ptr [[TMP13]], align 1
+// LINUX-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i32 24
+// LINUX-NEXT:    store i8 0, ptr [[TMP14]], align 8
+// LINUX-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TMP0]], i32 25
+// LINUX-NEXT:    store i8 0, ptr [[TMP15]], align 1
+// LINUX-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
+// LINUX-NEXT:    store i8 0, ptr [[TMP16]], align 2
+// LINUX-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
+// LINUX-NEXT:    store i8 0, ptr [[TMP17]], align 1
+// LINUX-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
+// LINUX-NEXT:    store i8 0, ptr [[TMP18]], align 4
+// LINUX-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// LINUX-NEXT:    store i8 0, ptr [[TMP19]], align 1
+// LINUX-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// LINUX-NEXT:    store i8 0, ptr [[TMP20]], align 2
+// LINUX-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// LINUX-NEXT:    store i8 0, ptr [[TMP21]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testStructWithLongDouble(
+// WINDOWS-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 4
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 17
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 18
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 2
+// WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 19
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 20
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 4
+// WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 21
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// WINDOWS-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 22
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP12]], align 2
+// WINDOWS-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP0]], i32 23
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP13]], align 1
+// WINDOWS-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i32 24
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP14]], align 8
+// WINDOWS-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TMP0]], i32 25
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP15]], align 1
+// WINDOWS-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP16]], align 2
+// WINDOWS-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP17]], align 1
+// WINDOWS-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP18]], align 4
+// WINDOWS-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP19]], align 1
+// WINDOWS-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP20]], align 2
+// WINDOWS-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP21]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testStructWithLongDouble(struct S3 *s) {
+  // "long double data[0-9]", PAD [10-15], "b", PAD [17-31]
+  __builtin_clear_padding(s);
+}
+
+struct S11 {
+  // will usually occupy 2 bytes:
+  unsigned char b1 : 3; // 1st 3 bits (in 1st byte) are b1
+  unsigned char b2 : 2; // next 2 bits (in 1st byte). The rest bits in byte 1 are unused
+  unsigned char b3 : 6; // 6 bits for b3 - doesn't fit into the 1st byte => starts a 2nd
+  unsigned char b4 : 2; // 2 bits for b4 - next (and final) bits in the 2nd byte
+};
+
+// LINUX-LABEL: define dso_local void @testBitFields(
+// LINUX-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
+// LINUX-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], 31
+// LINUX-NEXT:    store i8 [[TMP3]], ptr [[TMP1]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testBitFields(
+// WINDOWS-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
+// WINDOWS-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], 31
+// WINDOWS-NEXT:    store i8 [[TMP3]], ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testBitFields(struct S11 *s) {
+  // "b1" [0-2], "b2" [3-4], PAD [5-7], "b3" [8-13], "b4" [14-15]
+  // to clear 5-7, we should AND 0b00011111 (31)
+  __builtin_clear_padding(s);
+}
+
+
+// LINUX-LABEL: define dso_local void @testArrayNoPadding(
+// LINUX-SAME: ) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[I:%.*]] = alloca [4 x i32], align 16
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testArrayNoPadding(
+// WINDOWS-SAME: ) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[I:%.*]] = alloca [4 x i32], align 16
+// WINDOWS-NEXT:    ret void
+//
+void testArrayNoPadding(void) {
+  int i[4];
+  // there is no padding in the array.
+  __builtin_clear_padding(&i);
+}
+
+// LINUX-LABEL: define dso_local void @testArrayLongDouble(
+// LINUX-SAME: ) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[LD:%.*]] = alloca [2 x x86_fp80], align 16
+// LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[LD]], i32 10
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 2
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[LD]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[LD]], i32 12
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 4
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[LD]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[LD]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 2
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[LD]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[LD]], i32 26
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 2
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[LD]], i32 27
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[LD]], i32 28
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 4
+// LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[LD]], i32 29
+// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[LD]], i32 30
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 2
+// LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[LD]], i32 31
+// LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testArrayLongDouble(
+// WINDOWS-SAME: ) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[LD:%.*]] = alloca [2 x x86_fp80], align 16
+// WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[LD]], i32 10
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 2
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[LD]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[LD]], i32 12
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 4
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[LD]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[LD]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 2
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[LD]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[LD]], i32 26
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 2
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[LD]], i32 27
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[LD]], i32 28
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 4
+// WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[LD]], i32 29
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[LD]], i32 30
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 2
+// WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[LD]], i32 31
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testArrayLongDouble() {
+  // long double 0, [0-9] PAD [10-15]
+  // long double 1, [16-25] PAD [26-31]
+  long double ld[2];
+  __builtin_clear_padding(&ld);
+}
+
+// LINUX-LABEL: define dso_local void @testArrayOfStruct(
+// LINUX-SAME: ) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S:%.*]] = alloca [2 x [[STRUCT_S_LOCAL:%.*]]], align 16
+// LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 2
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 21
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 22
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 2
+// LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 23
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 29
+// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[S]], i32 30
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 2
+// LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[S]], i32 31
+// LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testArrayOfStruct(
+// WINDOWS-SAME: ) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S:%.*]] = alloca [2 x [[STRUCT_S_LOCAL:%.*]]], align 16
+// WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 2
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 21
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 22
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 2
+// WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 23
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 29
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[S]], i32 30
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 2
+// WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[S]], i32 31
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testArrayOfStruct(void) {
+  struct S_local {
+    int i1;
+    char c1;
+    int i2;
+    char c2;
+  };
+
+  // S[0].i1 [0-3], S[0].c1 [4], PAD [5-7],
+  // S[0].i2 [8-11], S[0].c2 [12], PAD [13-15],
+  // S[1].i1 [16-19], S[1].c1 [20], PAD [21-23],
+  // S[1].i2 [24-27], S[1].c2 [28], PAD [29-31]
+
+  struct S_local s[2];
+  __builtin_clear_padding(&s);
+}
+
+struct ArrOfStructsWithPadding {
+  struct Bar bars[2];
+};
+
+// ArrOfStructsWithPadding structure:
+// "c" (1), PAD_1, "d" (1), PAD_2, "c" (2), PAD_3, "d" (2), PAD_4
+// %struct.ArrOfStructsWithPadding = type { [2 x %struct.Bar] }
+
+// LINUX-LABEL: define dso_local void @testArrOfStructsWithPadding(
+// LINUX-SAME: ptr noundef [[ARR:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[ARR_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[ARR]], ptr [[ARR_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testArrOfStructsWithPadding(
+// WINDOWS-SAME: ptr noundef [[ARR:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[ARR_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[ARR]], ptr [[ARR_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testArrOfStructsWithPadding(struct ArrOfStructsWithPadding *arr) {
+  __builtin_clear_padding(arr);
+}
+
+// LINUX-LABEL: define dso_local void @testAtomic(
+// LINUX-SAME: ptr noundef [[BAR:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[BAR_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[BAR]], ptr [[BAR_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testAtomic(
+// WINDOWS-SAME: ptr noundef [[BAR:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[BAR_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[BAR]], ptr [[BAR_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testAtomic(_Atomic(struct Bar)* bar) {
+  __builtin_clear_padding(bar);
+}
+
+typedef float Float3Vec __attribute__((ext_vector_type(3)));
+typedef long double LongDouble3Vec __attribute__((ext_vector_type(3)));
+
+// LINUX-LABEL: define dso_local void @testAttributedType(
+// LINUX-SAME: ptr noundef [[V:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[V_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[V]], ptr [[V_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[V_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 4
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 2
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testAttributedType(
+// WINDOWS-SAME: ptr noundef [[V:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[V_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[V]], ptr [[V_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[V_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 4
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 2
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testAttributedType(Float3Vec* v) {
+  __builtin_clear_padding(v);
+}
+
+// LINUX-LABEL: define dso_local void @testAttributedLongDoubleType(
+// LINUX-SAME: ptr noundef [[V:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[V_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[V]], ptr [[V_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[V_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 32
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 32
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 33
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 34
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 35
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 36
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 4
+// LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 37
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 38
+// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 2
+// LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 39
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 40
+// LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 8
+// LINUX-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 41
+// LINUX-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// LINUX-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP0]], i32 42
+// LINUX-NEXT:    store i8 0, ptr [[TMP13]], align 2
+// LINUX-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i32 43
+// LINUX-NEXT:    store i8 0, ptr [[TMP14]], align 1
+// LINUX-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TMP0]], i32 44
+// LINUX-NEXT:    store i8 0, ptr [[TMP15]], align 4
+// LINUX-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 45
+// LINUX-NEXT:    store i8 0, ptr [[TMP16]], align 1
+// LINUX-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP0]], i32 46
+// LINUX-NEXT:    store i8 0, ptr [[TMP17]], align 2
+// LINUX-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP0]], i32 47
+// LINUX-NEXT:    store i8 0, ptr [[TMP18]], align 1
+// LINUX-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP0]], i32 48
+// LINUX-NEXT:    store i8 0, ptr [[TMP19]], align 16
+// LINUX-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP0]], i32 49
+// LINUX-NEXT:    store i8 0, ptr [[TMP20]], align 1
+// LINUX-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP0]], i32 50
+// LINUX-NEXT:    store i8 0, ptr [[TMP21]], align 2
+// LINUX-NEXT:    [[TMP22:%.*]] = getelementptr i8, ptr [[TMP0]], i32 51
+// LINUX-NEXT:    store i8 0, ptr [[TMP22]], align 1
+// LINUX-NEXT:    [[TMP23:%.*]] = getelementptr i8, ptr [[TMP0]], i32 52
+// LINUX-NEXT:    store i8 0, ptr [[TMP23]], align 4
+// LINUX-NEXT:    [[TMP24:%.*]] = getelementptr i8, ptr [[TMP0]], i32 53
+// LINUX-NEXT:    store i8 0, ptr [[TMP24]], align 1
+// LINUX-NEXT:    [[TMP25:%.*]] = getelementptr i8, ptr [[TMP0]], i32 54
+// LINUX-NEXT:    store i8 0, ptr [[TMP25]], align 2
+// LINUX-NEXT:    [[TMP26:%.*]] = getelementptr i8, ptr [[TMP0]], i32 55
+// LINUX-NEXT:    store i8 0, ptr [[TMP26]], align 1
+// LINUX-NEXT:    [[TMP27:%.*]] = getelementptr i8, ptr [[TMP0]], i32 56
+// LINUX-NEXT:    store i8 0, ptr [[TMP27]], align 8
+// LINUX-NEXT:    [[TMP28:%.*]] = getelementptr i8, ptr [[TMP0]], i32 57
+// LINUX-NEXT:    store i8 0, ptr [[TMP28]], align 1
+// LINUX-NEXT:    [[TMP29:%.*]] = getelementptr i8, ptr [[TMP0]], i32 58
+// LINUX-NEXT:    store i8 0, ptr [[TMP29]], align 2
+// LINUX-NEXT:    [[TMP30:%.*]] = getelementptr i8, ptr [[TMP0]], i32 59
+// LINUX-NEXT:    store i8 0, ptr [[TMP30]], align 1
+// LINUX-NEXT:    [[TMP31:%.*]] = getelementptr i8, ptr [[TMP0]], i32 60
+// LINUX-NEXT:    store i8 0, ptr [[TMP31]], align 4
+// LINUX-NEXT:    [[TMP32:%.*]] = getelementptr i8, ptr [[TMP0]], i32 61
+// LINUX-NEXT:    store i8 0, ptr [[TMP32]], align 1
+// LINUX-NEXT:    [[TMP33:%.*]] = getelementptr i8, ptr [[TMP0]], i32 62
+// LINUX-NEXT:    store i8 0, ptr [[TMP33]], align 2
+// LINUX-NEXT:    [[TMP34:%.*]] = getelementptr i8, ptr [[TMP0]], i32 63
+// LINUX-NEXT:    store i8 0, ptr [[TMP34]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @testAttributedLongDoubleType(
+// WINDOWS-SAME: ptr noundef [[V:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[V_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[V]], ptr [[V_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[V_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 32
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 32
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 33
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 34
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 35
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 36
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 4
+// WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 37
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 38
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 2
+// WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 39
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 40
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 8
+// WINDOWS-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 41
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// WINDOWS-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP0]], i32 42
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP13]], align 2
+// WINDOWS-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i32 43
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP14]], align 1
+// WINDOWS-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TMP0]], i32 44
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP15]], align 4
+// WINDOWS-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 45
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP16]], align 1
+// WINDOWS-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP0]], i32 46
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP17]], align 2
+// WINDOWS-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP0]], i32 47
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP18]], align 1
+// WINDOWS-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP0]], i32 48
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP19]], align 16
+// WINDOWS-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP0]], i32 49
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP20]], align 1
+// WINDOWS-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP0]], i32 50
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP21]], align 2
+// WINDOWS-NEXT:    [[TMP22:%.*]] = getelementptr i8, ptr [[TMP0]], i32 51
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP22]], align 1
+// WINDOWS-NEXT:    [[TMP23:%.*]] = getelementptr i8, ptr [[TMP0]], i32 52
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP23]], align 4
+// WINDOWS-NEXT:    [[TMP24:%.*]] = getelementptr i8, ptr [[TMP0]], i32 53
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP24]], align 1
+// WINDOWS-NEXT:    [[TMP25:%.*]] = getelementptr i8, ptr [[TMP0]], i32 54
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP25]], align 2
+// WINDOWS-NEXT:    [[TMP26:%.*]] = getelementptr i8, ptr [[TMP0]], i32 55
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP26]], align 1
+// WINDOWS-NEXT:    [[TMP27:%.*]] = getelementptr i8, ptr [[TMP0]], i32 56
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP27]], align 8
+// WINDOWS-NEXT:    [[TMP28:%.*]] = getelementptr i8, ptr [[TMP0]], i32 57
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP28]], align 1
+// WINDOWS-NEXT:    [[TMP29:%.*]] = getelementptr i8, ptr [[TMP0]], i32 58
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP29]], align 2
+// WINDOWS-NEXT:    [[TMP30:%.*]] = getelementptr i8, ptr [[TMP0]], i32 59
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP30]], align 1
+// WINDOWS-NEXT:    [[TMP31:%.*]] = getelementptr i8, ptr [[TMP0]], i32 60
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP31]], align 4
+// WINDOWS-NEXT:    [[TMP32:%.*]] = getelementptr i8, ptr [[TMP0]], i32 61
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP32]], align 1
+// WINDOWS-NEXT:    [[TMP33:%.*]] = getelementptr i8, ptr [[TMP0]], i32 62
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP33]], align 2
+// WINDOWS-NEXT:    [[TMP34:%.*]] = getelementptr i8, ptr [[TMP0]], i32 63
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP34]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testAttributedLongDoubleType(LongDouble3Vec *v) {
+  // long double elements occupy [0-9], [16-25], [32-41] on x86.
+  __builtin_clear_padding(v);
+}

diff  --git a/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
new file mode 100644
index 0000000000000..05ea1265e75db
--- /dev/null
+++ b/clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
@@ -0,0 +1,1521 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -std=c++20 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=LINUX
+// RUN: %clang_cc1 -std=c++20 -triple=x86_64-windows-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=WINDOWS
+
+
+struct Empty {};
+
+// LINUX-LABEL: define dso_local void @_Z9testEmptyP5Empty(
+// LINUX-SAME: ptr noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[E_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[E]], ptr [[E_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z9testEmptyP5Empty(
+// WINDOWS-SAME: ptr noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[E_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[E]], ptr [[E_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testEmpty(Empty *e) {
+  // This should clear the one byte that Empty occupies.
+  __builtin_clear_padding(e);
+}
+
+
+// LINUX-LABEL: define dso_local void @_Z22testPrimitiveNoPaddingPi(
+// LINUX-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[I_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[I]], ptr [[I_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z22testPrimitiveNoPaddingPi(
+// WINDOWS-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[I_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[I]], ptr [[I_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// WINDOWS-NEXT:    ret void
+//
+void testPrimitiveNoPadding(int *i) {
+  // This should not clear any padding, since int has no padding.
+  __builtin_clear_padding(i);
+}
+
+
+// LINUX-LABEL: define dso_local void @_Z23testPrimitiveLongDoublePe(
+// LINUX-SAME: ptr noundef [[LD:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[LD_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[LD]], ptr [[LD_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[LD_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 4
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z23testPrimitiveLongDoublePe(
+// WINDOWS-SAME: ptr noundef [[LD:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[LD_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[LD]], ptr [[LD_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[LD_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 4
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testPrimitiveLongDouble(long double *ld) {
+  // padding [10, 15] on x86
+  __builtin_clear_padding(ld);
+}
+
+// LINUX-LABEL: define dso_local void @_Z10testBitIntPDB97_(
+// LINUX-SAME: ptr noundef [[BI:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[BI_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[BI]], ptr [[BI_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BI_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// LINUX-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 4
+// LINUX-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], 1
+// LINUX-NEXT:    store i8 [[TMP3]], ptr [[TMP1]], align 4
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z10testBitIntPDB97_(
+// WINDOWS-SAME: ptr noundef [[BI:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[BI_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[BI]], ptr [[BI_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BI_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// WINDOWS-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 4
+// WINDOWS-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], 1
+// WINDOWS-NEXT:    store i8 [[TMP3]], ptr [[TMP1]], align 4
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testBitInt(_BitInt(97) *bi) {
+  // Storage is widened to 128 bits; clear bits [97, 128).
+  __builtin_clear_padding(bi);
+}
+
+
+// LINUX-LABEL: define dso_local void @_Z30testPrimitiveComplexLongDoublePCe(
+// LINUX-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 4
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 2
+// LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
+// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 4
+// LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 2
+// LINUX-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// LINUX-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z30testPrimitiveComplexLongDoublePCe(
+// WINDOWS-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 4
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 2
+// WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 4
+// WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 2
+// WINDOWS-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testPrimitiveComplexLongDouble(_Complex long double *c) {
+  // padding [10, 15] and [26, 31] on x86
+  __builtin_clear_padding(c);
+}
+
+union U1 {
+  int i;
+  char c;
+};
+
+// LINUX-LABEL: define dso_local void @_Z24testUnionDifferentLengthP2U1(
+// LINUX-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z24testUnionDifferentLengthP2U1(
+// WINDOWS-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// WINDOWS-NEXT:    ret void
+//
+void testUnionDifferentLength(U1 *u) {
+  // This should not clear the object representation bits of the non-active member.
+  __builtin_clear_padding(u);
+}
+
+
+struct S {
+  alignas(8) char c1;
+};
+
+union U2 {
+  S s1;
+  char c2;
+};
+
+// LINUX-LABEL: define dso_local void @_Z35testUnionTailPaddingOfLongestMemberP2U2(
+// LINUX-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 2
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 4
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 2
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z35testUnionTailPaddingOfLongestMemberP2U2(
+// WINDOWS-SAME: ptr noundef [[U:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[U_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[U]], ptr [[U_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[U_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 2
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 4
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 2
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testUnionTailPaddingOfLongestMember(U2 *u) {
+  // This should clear the tail padding of the longest member.
+  // [1 - 7]
+  __builtin_clear_padding(u);
+}
+
+
+struct alignas(4) Foo {
+  char a;
+  alignas(2) char b;
+};
+
+struct alignas(4) Bar {
+  char c;
+  alignas(2) char d;
+};
+
+struct alignas(4) Baz : Foo {
+  char e;
+  Bar f;
+};
+
+// Baz structure:
+// "a", PAD_1, "b", PAD_2, "c", PAD_3, PAD_4, PAD_5, "c", PAD_6, "d", PAD_7
+// %struct.Baz = type { %struct.Foo, i8, [3 x i8], %struct.Bar }
+// %struct.Foo = type { i8, i8, i8, i8 }
+// %struct.Bar = type { i8, i8, i8, i8 }
+
+// LINUX-LABEL: define dso_local void @_Z33testStructPaddingInBetweenMembersP3Baz(
+// LINUX-SAME: ptr noundef [[BAZ:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[BAZ_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[BAZ]], ptr [[BAZ_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAZ_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 2
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z33testStructPaddingInBetweenMembersP3Baz(
+// WINDOWS-SAME: ptr noundef [[BAZ:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[BAZ_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[BAZ]], ptr [[BAZ_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAZ_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 2
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testStructPaddingInBetweenMembers(Baz *baz) {
+  // this should clear all the padding in between various members
+  __builtin_clear_padding(baz);
+}
+
+// LINUX-LABEL: define dso_local void @_Z18testStructVolatilePV3Baz(
+// LINUX-SAME: ptr noundef [[BAZ:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[BAZ_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[BAZ]], ptr [[BAZ_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAZ_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 2
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z18testStructVolatilePV3Baz(
+// WINDOWS-SAME: ptr noundef [[BAZ:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[BAZ_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[BAZ]], ptr [[BAZ_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAZ_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 2
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 9
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testStructVolatile(volatile Baz *baz) {
+  // this should clear all the padding in between various members
+  __builtin_clear_padding(baz);
+}
+
+class S1 {
+  int x;
+  char c;
+};
+
+class S2 {
+  [[no_unique_address]] S1 s1;
+  bool b;
+};
+
+// LINUX-LABEL: define dso_local void @_Z19testNoUniqueAddressP2S2(
+// LINUX-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z19testNoUniqueAddressP2S2(
+// WINDOWS-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testNoUniqueAddress(S2 *s) {
+  // "x [0-3]",  "c" , "b", PAD [6-7]
+  __builtin_clear_padding(s);
+}
+
+struct S3 {
+  long double l;
+  bool b;
+};
+
+// LINUX-LABEL: define dso_local void @_Z24testStructWithLongDoubleP2S3(
+// LINUX-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 4
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 17
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 18
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 2
+// LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 19
+// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 20
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 4
+// LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 21
+// LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// LINUX-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 22
+// LINUX-NEXT:    store i8 0, ptr [[TMP12]], align 2
+// LINUX-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP0]], i32 23
+// LINUX-NEXT:    store i8 0, ptr [[TMP13]], align 1
+// LINUX-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i32 24
+// LINUX-NEXT:    store i8 0, ptr [[TMP14]], align 8
+// LINUX-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TMP0]], i32 25
+// LINUX-NEXT:    store i8 0, ptr [[TMP15]], align 1
+// LINUX-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
+// LINUX-NEXT:    store i8 0, ptr [[TMP16]], align 2
+// LINUX-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
+// LINUX-NEXT:    store i8 0, ptr [[TMP17]], align 1
+// LINUX-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
+// LINUX-NEXT:    store i8 0, ptr [[TMP18]], align 4
+// LINUX-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// LINUX-NEXT:    store i8 0, ptr [[TMP19]], align 1
+// LINUX-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// LINUX-NEXT:    store i8 0, ptr [[TMP20]], align 2
+// LINUX-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// LINUX-NEXT:    store i8 0, ptr [[TMP21]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z24testStructWithLongDoubleP2S3(
+// WINDOWS-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 10
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 4
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 17
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 18
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 2
+// WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 19
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 20
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 4
+// WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 21
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// WINDOWS-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 22
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP12]], align 2
+// WINDOWS-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP0]], i32 23
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP13]], align 1
+// WINDOWS-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i32 24
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP14]], align 8
+// WINDOWS-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TMP0]], i32 25
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP15]], align 1
+// WINDOWS-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 26
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP16]], align 2
+// WINDOWS-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP0]], i32 27
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP17]], align 1
+// WINDOWS-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP0]], i32 28
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP18]], align 4
+// WINDOWS-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP0]], i32 29
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP19]], align 1
+// WINDOWS-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP20]], align 2
+// WINDOWS-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP21]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testStructWithLongDouble(S3 *s) {
+  // "long double data[0-9]", PAD [10-15], "b", PAD [17-31]
+  __builtin_clear_padding(s);
+}
+
+struct B {
+  int i;
+};
+struct S4 : Empty, B {
+  bool b;
+};
+
+// LINUX-LABEL: define dso_local void @_Z23testStructWithEmptyBaseP2S4(
+// LINUX-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 2
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z23testStructWithEmptyBaseP2S4(
+// WINDOWS-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 2
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testStructWithEmptyBase(S4 *s) {
+  // "i" [0-3], "b" [4], PAD [5-7]
+  __builtin_clear_padding(s);
+}
+
+struct B1 {
+  char c1;
+};
+struct B2 {
+  alignas(4) char c2;
+};
+struct S5 : B1, B2 {
+};
+
+// LINUX-LABEL: define dso_local void @_Z23testPaddingBetweenBasesP2S5(
+// LINUX-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 2
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z23testPaddingBetweenBasesP2S5(
+// WINDOWS-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 2
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testPaddingBetweenBases(S5 *s) {
+  // "c1" [0], PAD [1-3] , "c2" [4], PAD [5-7]
+  __builtin_clear_padding(s);
+}
+
+struct B3 {
+  char c1;
+};
+
+struct B4 {
+  char c2;
+};
+
+struct S6 : B3, B4 {
+  alignas(4) char c3;
+};
+
+// LINUX-LABEL: define dso_local void @_Z24testPaddingAfterLastBaseP2S6(
+// LINUX-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 2
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z24testPaddingAfterLastBaseP2S6(
+// WINDOWS-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 2
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testPaddingAfterLastBase(S6 *s) {
+  // "c1"[0], "c2"[1], PAD [2-3], "c3" [4], PAD [5-7]
+  __builtin_clear_padding(s);
+}
+
+
+struct VirtualBase {
+  unsigned int x;
+  virtual int call() { return x; };
+  virtual ~VirtualBase() = default;
+};
+
+struct NonVirtualBase {
+  char y;
+};
+
+struct S7 : VirtualBase, NonVirtualBase {
+  virtual int call() override { return 5; }
+  bool z;
+};
+
+// LINUX-LABEL: define dso_local void @_Z10testVtable2S7(
+// LINUX-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 2
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z10testVtable2S7(
+// WINDOWS-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 2
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testVtable(S7 s) {
+  // "vtable ptr" [0-7], "x" [8-11], "y" [12], "z" [13], PAD [14-15]
+  __builtin_clear_padding(&s);
+}
+
+struct VirtualBase1 {
+  unsigned int x1;
+  virtual int call1() { return x1; };
+  virtual ~VirtualBase1() = default;
+};
+
+struct VirtualBase2 {
+  unsigned int x2;
+  virtual int call2() { return x2; };
+  virtual ~VirtualBase2() = default;
+};
+
+struct VirtualBase3 {
+  unsigned int x3;
+  virtual int call3() { return x3; };
+  virtual ~VirtualBase3() = default;
+};
+
+struct NonVirtualBase1 {
+  char y;
+};
+struct S8 : VirtualBase1, VirtualBase2, NonVirtualBase1, VirtualBase3 {
+  virtual int call1() override { return 5; }
+  bool z;
+};
+
+// LINUX-LABEL: define dso_local void @_Z23testMultipleBasesVtable2S8(
+// LINUX-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 12
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 4
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 2
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 29
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 30
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 31
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 45
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 46
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 2
+// LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 47
+// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z23testMultipleBasesVtable2S8(
+// WINDOWS-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 12
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 4
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 2
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 29
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 30
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 31
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 45
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 46
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 2
+// WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 47
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testMultipleBasesVtable(S8 s) {
+  // "vtable ptr" [0-7], "x1" [8-11], PAD "[12-15]",
+  // "vtable ptr" [16-23], "x2" [24-27], "y" [28], PAD "[29-31]",
+  // "vtable_ptr" [32-39], "x3" [40-43], "z" [44], PAD [45-47]
+  __builtin_clear_padding(&s);
+}
+
+struct VirtualChain1 {
+  unsigned int x1;
+  virtual int call1() { return x1; };
+  virtual ~VirtualChain1() = default;
+};
+
+struct VirtualChain2 : VirtualChain1 {
+  unsigned int x2;
+  virtual int call2() { return x2; };
+  virtual ~VirtualChain2() = default;
+};
+
+struct VirtualChain3 : VirtualChain2 {
+  unsigned int x3;
+  virtual int call3() { return x3; };
+  virtual ~VirtualChain3() = default;
+};
+
+struct NonVirtualBase2 {
+  char y;
+};
+
+struct S9 : NonVirtualBase2, VirtualChain3 {
+  bool z;
+};
+
+// LINUX-LABEL: define dso_local void @_Z16testVirtualChain2S9(
+// LINUX-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 22
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 2
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 23
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z16testVirtualChain2S9(
+// WINDOWS-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 22
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 2
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 23
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testVirtualChain(S9 s) {
+  // This should clear the padding after the bool z.
+  // base reordered
+  // "vtable ptr" [0-7],  "x1" [8-11], x2 [12-15], x3 [16-19],
+  // y [20], z [21], PAD [22-23]
+  __builtin_clear_padding(&s);
+}
+
+
+struct Base {
+  int x;
+};
+
+struct D1 : virtual Base {
+  int d1;
+  bool b1;
+};
+struct D2 : virtual Base {
+  int d2;
+  bool b2;
+};
+
+struct S10 : D1, D2 {
+  bool s;
+};
+
+// LINUX-LABEL: define dso_local void @_Z22testVirtualInheritance3S10(
+// LINUX-SAME: ptr noundef dead_on_return [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 30
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 2
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 31
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 36
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 4
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 37
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 38
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 2
+// LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 39
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z22testVirtualInheritance3S10(
+// WINDOWS-SAME: ptr noundef dead_on_return [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 30
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 2
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 31
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 36
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 4
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 37
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 38
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 2
+// WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 39
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testVirtualInheritance(S10 s) {
+  // note derived member placed before the virtual base
+  // "vtable ptr" [0-7],  "d1" [8-11], "b1" [12], PAD [13-15],
+  // "vtable ptr" [16-23],  "d2" [24-27], "b2" [28], s [29],  PAD [30-31],
+  // "x" [32-35], PAD [36-39]
+  __builtin_clear_padding(&s);
+}
+
+struct S11 {
+  // will usually occupy 2 bytes:
+  unsigned char b1 : 3; // 1st 3 bits (in 1st byte) are b1
+  unsigned char b2 : 2; // next 2 bits (in 1st byte). The rest bits in byte 1 are unused
+  unsigned char b3 : 6; // 6 bits for b3 - doesn't fit into the 1st byte => starts a 2nd
+  unsigned char b4 : 2; // 2 bits for b4 - next (and final) bits in the 2nd byte
+};
+
+// LINUX-LABEL: define dso_local void @_Z13testBitFieldsP3S11(
+// LINUX-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
+// LINUX-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], 31
+// LINUX-NEXT:    store i8 [[TMP3]], ptr [[TMP1]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z13testBitFieldsP3S11(
+// WINDOWS-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
+// WINDOWS-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = and i8 [[TMP2]], 31
+// WINDOWS-NEXT:    store i8 [[TMP3]], ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testBitFields(S11 *s) {
+  // "b1" [0-2], "b2" [3-4], PAD [5-7], "b3" [8-13], "b4" [14-15]
+  // to clear 5-7, we should AND 0b00011111 (31)
+  __builtin_clear_padding(s);
+}
+
+
+// LINUX-LABEL: define dso_local void @_Z18testArrayNoPaddingv(
+// LINUX-SAME: ) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[I:%.*]] = alloca [4 x i32], align 16
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z18testArrayNoPaddingv(
+// WINDOWS-SAME: ) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[I:%.*]] = alloca [4 x i32], align 16
+// WINDOWS-NEXT:    ret void
+//
+void testArrayNoPadding() {
+  int i[4];
+  // there is no padding in the array.
+  __builtin_clear_padding(&i);
+}
+
+// LINUX-LABEL: define dso_local void @_Z19testArrayLongDoublev(
+// LINUX-SAME: ) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[LD:%.*]] = alloca [2 x x86_fp80], align 16
+// LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[LD]], i32 10
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 2
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[LD]], i32 11
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[LD]], i32 12
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 4
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[LD]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[LD]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 2
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[LD]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[LD]], i32 26
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 2
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[LD]], i32 27
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[LD]], i32 28
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 4
+// LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[LD]], i32 29
+// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[LD]], i32 30
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 2
+// LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[LD]], i32 31
+// LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z19testArrayLongDoublev(
+// WINDOWS-SAME: ) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[LD:%.*]] = alloca [2 x x86_fp80], align 16
+// WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[LD]], i32 10
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 2
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[LD]], i32 11
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[LD]], i32 12
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 4
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[LD]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[LD]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 2
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[LD]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[LD]], i32 26
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 2
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[LD]], i32 27
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 1
+// WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[LD]], i32 28
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 4
+// WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[LD]], i32 29
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[LD]], i32 30
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 2
+// WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[LD]], i32 31
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testArrayLongDouble() {
+  // long double 0, [0-9] PAD [10-15]
+  // long double 1, [16-25] PAD [26-31]
+  long double ld[2];
+  __builtin_clear_padding(&ld);
+}
+
+// LINUX-LABEL: define dso_local void @_Z17testArrayOfStructv(
+// LINUX-SAME: ) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S:%.*]] = alloca [2 x [[STRUCT_S_0:%.*]]], align 16
+// LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 2
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 21
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 22
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 2
+// LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 23
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 29
+// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[S]], i32 30
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 2
+// LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[S]], i32 31
+// LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z17testArrayOfStructv(
+// WINDOWS-SAME: ) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S:%.*]] = alloca [2 x [[STRUCT_S_0:%.*]]], align 16
+// WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[S]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[S]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 2
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[S]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 1
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[S]], i32 21
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[S]], i32 22
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 2
+// WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[S]], i32 23
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[S]], i32 29
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 1
+// WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[S]], i32 30
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 2
+// WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[S]], i32 31
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testArrayOfStruct() {
+  struct S {
+    int i1;
+    char c1;
+    int i2;
+    char c2;
+  };
+
+  // S[0].i1 [0-3], S[0].c1 [4], PAD [5-7],
+  // S[0].i2 [8-11], S[0].c2 [12], PAD [13-15],
+  // S[1].i1 [16-19], S[1].c1 [20], PAD [21-23],
+  // S[1].i2 [24-27], S[1].c2 [28], PAD [29-31]
+
+  S s[2];
+  __builtin_clear_padding(&s);
+}
+
+struct ArrOfStructsWithPadding {
+  Bar bars[2];
+};
+
+// ArrOfStructsWithPadding structure:
+// "c" (1), PAD_1, "d" (1), PAD_2, "c" (2), PAD_3, "d" (2), PAD_4
+// %struct.ArrOfStructsWithPadding = type { [2 x %struct.Bar] }
+
+// LINUX-LABEL: define dso_local void @_Z27testArrOfStructsWithPaddingP23ArrOfStructsWithPadding(
+// LINUX-SAME: ptr noundef [[ARR:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[ARR_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[ARR]], ptr [[ARR_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z27testArrOfStructsWithPaddingP23ArrOfStructsWithPadding(
+// WINDOWS-SAME: ptr noundef [[ARR:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[ARR_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[ARR]], ptr [[ARR_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testArrOfStructsWithPadding(ArrOfStructsWithPadding *arr) {
+  __builtin_clear_padding(arr);
+}
+
+template <class T>
+struct S12 {
+  T t;
+  char c;
+};
+
+// LINUX-LABEL: define dso_local void @_Z18testTemplateStructP3S12IiE(
+// LINUX-SAME: ptr noundef [[S12:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[S12_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[S12]], ptr [[S12_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S12_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 2
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z18testTemplateStructP3S12IiE(
+// WINDOWS-SAME: ptr noundef [[S12:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[S12_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[S12]], ptr [[S12_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[S12_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 2
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testTemplateStruct(S12<int>* s12) {
+  __builtin_clear_padding(s12);
+}
+
+// LINUX-LABEL: define dso_local void @_Z10testAtomicPU7_Atomic3Bar(
+// LINUX-SAME: ptr noundef [[BAR:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[BAR_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[BAR]], ptr [[BAR_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z10testAtomicPU7_Atomic3Bar(
+// WINDOWS-SAME: ptr noundef [[BAR:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[BAR_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[BAR]], ptr [[BAR_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 1
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 1
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 3
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testAtomic(_Atomic(Bar)* bar) {
+  __builtin_clear_padding(bar);
+}
+
+
+struct NonTriviallyCopyable {
+  int i;
+  char c;
+
+  NonTriviallyCopyable(){}
+  NonTriviallyCopyable(const NonTriviallyCopyable&) {}
+  ~NonTriviallyCopyable() {}
+};
+
+// LINUX-LABEL: define dso_local void @_Z24testNonTriviallyCopyable20NonTriviallyCopyable(
+// LINUX-SAME: ptr noundef [[NTC:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[NTC_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[NTC]], ptr [[NTC_INDIRECT_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[NTC]], i32 5
+// LINUX-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[NTC]], i32 6
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[NTC]], i32 7
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z24testNonTriviallyCopyable20NonTriviallyCopyable(
+// WINDOWS-SAME: ptr noundef [[NTC:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[NTC_INDIRECT_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[NTC]], ptr [[NTC_INDIRECT_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[NTC]], i32 5
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP0]], align 1
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[NTC]], i32 6
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[NTC]], i32 7
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testNonTriviallyCopyable(NonTriviallyCopyable ntc) {
+  __builtin_clear_padding(&ntc);
+}
+
+typedef float Float3Vec __attribute__((ext_vector_type(3)));
+typedef long double LongDouble3Vec __attribute__((ext_vector_type(3)));
+
+// LINUX-LABEL: define dso_local void @_Z18testAttributedTypePDv3_f(
+// LINUX-SAME: ptr noundef [[V:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[V_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[V]], ptr [[V_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[V_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 4
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 2
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z18testAttributedTypePDv3_f(
+// WINDOWS-SAME: ptr noundef [[V:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[V_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[V]], ptr [[V_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[V_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 12
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 4
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 13
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 14
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 2
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 15
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testAttributedType(Float3Vec* v) {
+  __builtin_clear_padding(v);
+}
+
+// LINUX-LABEL: define dso_local void @_Z28testAttributedLongDoubleTypePDv3_e(
+// LINUX-SAME: ptr noundef [[V:%.*]]) #[[ATTR0]] {
+// LINUX-NEXT:  [[ENTRY:.*:]]
+// LINUX-NEXT:    [[V_ADDR:%.*]] = alloca ptr, align 8
+// LINUX-NEXT:    store ptr [[V]], ptr [[V_ADDR]], align 8
+// LINUX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[V_ADDR]], align 8
+// LINUX-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// LINUX-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// LINUX-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// LINUX-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// LINUX-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 32
+// LINUX-NEXT:    store i8 0, ptr [[TMP3]], align 32
+// LINUX-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 33
+// LINUX-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// LINUX-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 34
+// LINUX-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// LINUX-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 35
+// LINUX-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// LINUX-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 36
+// LINUX-NEXT:    store i8 0, ptr [[TMP7]], align 4
+// LINUX-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 37
+// LINUX-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// LINUX-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 38
+// LINUX-NEXT:    store i8 0, ptr [[TMP9]], align 2
+// LINUX-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 39
+// LINUX-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// LINUX-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 40
+// LINUX-NEXT:    store i8 0, ptr [[TMP11]], align 8
+// LINUX-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 41
+// LINUX-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// LINUX-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP0]], i32 42
+// LINUX-NEXT:    store i8 0, ptr [[TMP13]], align 2
+// LINUX-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i32 43
+// LINUX-NEXT:    store i8 0, ptr [[TMP14]], align 1
+// LINUX-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TMP0]], i32 44
+// LINUX-NEXT:    store i8 0, ptr [[TMP15]], align 4
+// LINUX-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 45
+// LINUX-NEXT:    store i8 0, ptr [[TMP16]], align 1
+// LINUX-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP0]], i32 46
+// LINUX-NEXT:    store i8 0, ptr [[TMP17]], align 2
+// LINUX-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP0]], i32 47
+// LINUX-NEXT:    store i8 0, ptr [[TMP18]], align 1
+// LINUX-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP0]], i32 48
+// LINUX-NEXT:    store i8 0, ptr [[TMP19]], align 16
+// LINUX-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP0]], i32 49
+// LINUX-NEXT:    store i8 0, ptr [[TMP20]], align 1
+// LINUX-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP0]], i32 50
+// LINUX-NEXT:    store i8 0, ptr [[TMP21]], align 2
+// LINUX-NEXT:    [[TMP22:%.*]] = getelementptr i8, ptr [[TMP0]], i32 51
+// LINUX-NEXT:    store i8 0, ptr [[TMP22]], align 1
+// LINUX-NEXT:    [[TMP23:%.*]] = getelementptr i8, ptr [[TMP0]], i32 52
+// LINUX-NEXT:    store i8 0, ptr [[TMP23]], align 4
+// LINUX-NEXT:    [[TMP24:%.*]] = getelementptr i8, ptr [[TMP0]], i32 53
+// LINUX-NEXT:    store i8 0, ptr [[TMP24]], align 1
+// LINUX-NEXT:    [[TMP25:%.*]] = getelementptr i8, ptr [[TMP0]], i32 54
+// LINUX-NEXT:    store i8 0, ptr [[TMP25]], align 2
+// LINUX-NEXT:    [[TMP26:%.*]] = getelementptr i8, ptr [[TMP0]], i32 55
+// LINUX-NEXT:    store i8 0, ptr [[TMP26]], align 1
+// LINUX-NEXT:    [[TMP27:%.*]] = getelementptr i8, ptr [[TMP0]], i32 56
+// LINUX-NEXT:    store i8 0, ptr [[TMP27]], align 8
+// LINUX-NEXT:    [[TMP28:%.*]] = getelementptr i8, ptr [[TMP0]], i32 57
+// LINUX-NEXT:    store i8 0, ptr [[TMP28]], align 1
+// LINUX-NEXT:    [[TMP29:%.*]] = getelementptr i8, ptr [[TMP0]], i32 58
+// LINUX-NEXT:    store i8 0, ptr [[TMP29]], align 2
+// LINUX-NEXT:    [[TMP30:%.*]] = getelementptr i8, ptr [[TMP0]], i32 59
+// LINUX-NEXT:    store i8 0, ptr [[TMP30]], align 1
+// LINUX-NEXT:    [[TMP31:%.*]] = getelementptr i8, ptr [[TMP0]], i32 60
+// LINUX-NEXT:    store i8 0, ptr [[TMP31]], align 4
+// LINUX-NEXT:    [[TMP32:%.*]] = getelementptr i8, ptr [[TMP0]], i32 61
+// LINUX-NEXT:    store i8 0, ptr [[TMP32]], align 1
+// LINUX-NEXT:    [[TMP33:%.*]] = getelementptr i8, ptr [[TMP0]], i32 62
+// LINUX-NEXT:    store i8 0, ptr [[TMP33]], align 2
+// LINUX-NEXT:    [[TMP34:%.*]] = getelementptr i8, ptr [[TMP0]], i32 63
+// LINUX-NEXT:    store i8 0, ptr [[TMP34]], align 1
+// LINUX-NEXT:    ret void
+//
+// WINDOWS-LABEL: define dso_local void @_Z28testAttributedLongDoubleTypePDv3_e(
+// WINDOWS-SAME: ptr noundef [[V:%.*]]) #[[ATTR0]] {
+// WINDOWS-NEXT:  [[ENTRY:.*:]]
+// WINDOWS-NEXT:    [[V_ADDR:%.*]] = alloca ptr, align 8
+// WINDOWS-NEXT:    store ptr [[V]], ptr [[V_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[V_ADDR]], align 8
+// WINDOWS-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 30
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP1]], align 2
+// WINDOWS-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 31
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP2]], align 1
+// WINDOWS-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i32 32
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP3]], align 32
+// WINDOWS-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i32 33
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP4]], align 1
+// WINDOWS-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i32 34
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP5]], align 2
+// WINDOWS-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i32 35
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP6]], align 1
+// WINDOWS-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 36
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP7]], align 4
+// WINDOWS-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 37
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP8]], align 1
+// WINDOWS-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP0]], i32 38
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP9]], align 2
+// WINDOWS-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP0]], i32 39
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP10]], align 1
+// WINDOWS-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[TMP0]], i32 40
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP11]], align 8
+// WINDOWS-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i32 41
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP12]], align 1
+// WINDOWS-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP0]], i32 42
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP13]], align 2
+// WINDOWS-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i32 43
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP14]], align 1
+// WINDOWS-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TMP0]], i32 44
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP15]], align 4
+// WINDOWS-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP0]], i32 45
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP16]], align 1
+// WINDOWS-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP0]], i32 46
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP17]], align 2
+// WINDOWS-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[TMP0]], i32 47
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP18]], align 1
+// WINDOWS-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP0]], i32 48
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP19]], align 16
+// WINDOWS-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP0]], i32 49
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP20]], align 1
+// WINDOWS-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP0]], i32 50
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP21]], align 2
+// WINDOWS-NEXT:    [[TMP22:%.*]] = getelementptr i8, ptr [[TMP0]], i32 51
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP22]], align 1
+// WINDOWS-NEXT:    [[TMP23:%.*]] = getelementptr i8, ptr [[TMP0]], i32 52
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP23]], align 4
+// WINDOWS-NEXT:    [[TMP24:%.*]] = getelementptr i8, ptr [[TMP0]], i32 53
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP24]], align 1
+// WINDOWS-NEXT:    [[TMP25:%.*]] = getelementptr i8, ptr [[TMP0]], i32 54
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP25]], align 2
+// WINDOWS-NEXT:    [[TMP26:%.*]] = getelementptr i8, ptr [[TMP0]], i32 55
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP26]], align 1
+// WINDOWS-NEXT:    [[TMP27:%.*]] = getelementptr i8, ptr [[TMP0]], i32 56
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP27]], align 8
+// WINDOWS-NEXT:    [[TMP28:%.*]] = getelementptr i8, ptr [[TMP0]], i32 57
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP28]], align 1
+// WINDOWS-NEXT:    [[TMP29:%.*]] = getelementptr i8, ptr [[TMP0]], i32 58
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP29]], align 2
+// WINDOWS-NEXT:    [[TMP30:%.*]] = getelementptr i8, ptr [[TMP0]], i32 59
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP30]], align 1
+// WINDOWS-NEXT:    [[TMP31:%.*]] = getelementptr i8, ptr [[TMP0]], i32 60
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP31]], align 4
+// WINDOWS-NEXT:    [[TMP32:%.*]] = getelementptr i8, ptr [[TMP0]], i32 61
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP32]], align 1
+// WINDOWS-NEXT:    [[TMP33:%.*]] = getelementptr i8, ptr [[TMP0]], i32 62
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP33]], align 2
+// WINDOWS-NEXT:    [[TMP34:%.*]] = getelementptr i8, ptr [[TMP0]], i32 63
+// WINDOWS-NEXT:    store i8 0, ptr [[TMP34]], align 1
+// WINDOWS-NEXT:    ret void
+//
+void testAttributedLongDoubleType(LongDouble3Vec *v) {
+  // long double elements occupy [0-9], [16-25], [32-41] on x86.
+  __builtin_clear_padding(v);
+}

diff  --git a/clang/test/Sema/builtin-clear-padding.c b/clang/test/Sema/builtin-clear-padding.c
new file mode 100644
index 0000000000000..c0d83a522ac3e
--- /dev/null
+++ b/clang/test/Sema/builtin-clear-padding.c
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+struct Foo {};
+
+void test(int a, struct Foo b, int *d, struct Foo *e, const struct Foo *f) {
+  __builtin_clear_padding(a); // expected-error {{passing 'int' to parameter of incompatible type pointer: type mismatch at 1st parameter ('int' vs pointer)}}
+  __builtin_clear_padding(b); // expected-error {{passing 'struct Foo' to parameter of incompatible type pointer: type mismatch at 1st parameter ('struct Foo' vs pointer)}}
+  __builtin_clear_padding(d); // This should not error.
+  __builtin_clear_padding(e); // This should not error.
+  __builtin_clear_padding(f); // expected-error {{read-only variable is not assignable}}
+}
+
+struct Incomplete; // expected-note {{forward declaration of 'struct Incomplete'}}
+
+void testIncomplete(void* v, struct Incomplete *i) {
+  __builtin_clear_padding(v); // expected-error {{variable has incomplete type 'void'}}
+  __builtin_clear_padding(i); // expected-error {{variable has incomplete type 'struct Incomplete'}}
+}
+
+void testNumArgs(int* i) {
+  __builtin_clear_padding(); // expected-error {{too few arguments to function call, expected 1, have 0}}
+  __builtin_clear_padding(i); // This should not error.
+  __builtin_clear_padding(i, i); // expected-error {{too many arguments to function call, expected 1, have 2}}
+  __builtin_clear_padding(i, i, i); // expected-error {{too many arguments to function call, expected 1, have 3}}
+  __builtin_clear_padding(i, i, i, i); // expected-error {{too many arguments to function call, expected 1, have 4}}
+}
+
+void testFunctionPointer(void(*f)()) {
+  __builtin_clear_padding(f); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('void (*)()' invalid)}}
+}
+
+struct WithVLA {
+  int i;
+  char c[];
+};
+
+struct WithVLA2 {
+  int i2;
+  struct WithVLA w;
+};
+
+struct WithVLA3 {
+  struct WithVLA2 w2;
+};
+
+void testVLA(struct WithVLA* w1, struct WithVLA2* w2, struct WithVLA3* w3) {
+  __builtin_clear_padding(w1); // expected-error {{'struct WithVLA' has flexible array member, which is unsupported by __builtin_clear_padding}}
+  __builtin_clear_padding(w2); // expected-error {{'struct WithVLA2' has flexible array member, which is unsupported by __builtin_clear_padding}}
+  __builtin_clear_padding(w3); // expected-error {{'struct WithVLA3' has flexible array member, which is unsupported by __builtin_clear_padding}}
+}
+

diff  --git a/clang/test/SemaCXX/builtin-clear-padding.cpp b/clang/test/SemaCXX/builtin-clear-padding.cpp
new file mode 100644
index 0000000000000..e03475a3af2ad
--- /dev/null
+++ b/clang/test/SemaCXX/builtin-clear-padding.cpp
@@ -0,0 +1,98 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+struct Foo {};
+
+void test(int a, Foo b, int *d, Foo *e, const Foo *f) {
+  __builtin_clear_padding(a); // expected-error {{passing 'int' to parameter of incompatible type pointer: type mismatch at 1st parameter ('int' vs pointer)}}
+  __builtin_clear_padding(b); // expected-error {{passing 'Foo' to parameter of incompatible type pointer: type mismatch at 1st parameter ('Foo' vs pointer)}}
+  __builtin_clear_padding(d); // This should not error.
+  __builtin_clear_padding(e); // This should not error.
+  __builtin_clear_padding(f); // expected-error {{read-only variable is not assignable}}
+}
+
+struct Incomplete; // expected-note {{forward declaration of 'Incomplete'}}
+
+void testIncomplete(void* v, Incomplete *i) {
+  __builtin_clear_padding(v); // expected-error {{variable has incomplete type 'void'}}
+  __builtin_clear_padding(i); // expected-error {{variable has incomplete type 'Incomplete'}}
+}
+
+void testNumArgs(int* i) {
+  __builtin_clear_padding(); // expected-error {{too few arguments to function call, expected 1, have 0}}
+  __builtin_clear_padding(i); // This should not error.
+  __builtin_clear_padding(i, i); // expected-error {{too many arguments to function call, expected 1, have 2}}
+  __builtin_clear_padding(i, i, i); // expected-error {{too many arguments to function call, expected 1, have 3}}
+  __builtin_clear_padding(i, i, i, i); // expected-error {{too many arguments to function call, expected 1, have 4}}
+}
+
+struct NonTriviallyCopyable {
+  NonTriviallyCopyable() {}
+  NonTriviallyCopyable(const NonTriviallyCopyable&){}
+};
+
+struct DerivedNonTriviallyCopyable : NonTriviallyCopyable {};
+
+void testNonTriviallyCopyable(NonTriviallyCopyable& ntc0, NonTriviallyCopyable ntc1, DerivedNonTriviallyCopyable& dntc0, DerivedNonTriviallyCopyable dntc1) {
+  NonTriviallyCopyable ntc2;
+  NonTriviallyCopyable& ntc3 = ntc0;
+  DerivedNonTriviallyCopyable dntc2;
+  DerivedNonTriviallyCopyable& dntc3 = dntc0;
+
+  __builtin_clear_padding(&ntc0); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('NonTriviallyCopyable *' invalid)}}
+  __builtin_clear_padding(&ntc1);
+  __builtin_clear_padding(&ntc2);
+  __builtin_clear_padding(&ntc3); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('NonTriviallyCopyable *' invalid)}}
+
+  __builtin_clear_padding(&dntc0); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('DerivedNonTriviallyCopyable *' invalid)}}
+  __builtin_clear_padding(&dntc1);
+  __builtin_clear_padding(&dntc2);
+  __builtin_clear_padding(&dntc3); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('DerivedNonTriviallyCopyable *' invalid)}}
+
+  __builtin_clear_padding((NonTriviallyCopyable*)&ntc0); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('NonTriviallyCopyable *' invalid)}}
+  __builtin_clear_padding((NonTriviallyCopyable*)&ntc1);
+  __builtin_clear_padding((NonTriviallyCopyable*)&ntc2);
+  __builtin_clear_padding((NonTriviallyCopyable*)&ntc3); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('NonTriviallyCopyable *' invalid)}}
+
+  __builtin_clear_padding((DerivedNonTriviallyCopyable*)&dntc0); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('DerivedNonTriviallyCopyable *' invalid)}}
+  __builtin_clear_padding((DerivedNonTriviallyCopyable*)&dntc1);
+  __builtin_clear_padding((DerivedNonTriviallyCopyable*)&dntc2);
+  __builtin_clear_padding((DerivedNonTriviallyCopyable*)&dntc3); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('DerivedNonTriviallyCopyable *' invalid)}}
+
+  __builtin_clear_padding((NonTriviallyCopyable*)&dntc0); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('NonTriviallyCopyable *' invalid)}}
+  __builtin_clear_padding((NonTriviallyCopyable*)&dntc1); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('NonTriviallyCopyable *' invalid)}}
+  __builtin_clear_padding((NonTriviallyCopyable*)&dntc2); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('NonTriviallyCopyable *' invalid)}}
+  __builtin_clear_padding((NonTriviallyCopyable*)&dntc3); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('NonTriviallyCopyable *' invalid)}}
+}
+
+struct Bar {
+  Foo *foo;
+};
+
+void testMemberPointer(Foo* Bar::*mp) {
+  __builtin_clear_padding(mp); // expected-error {{passing 'Foo *Bar::*' to parameter of incompatible type pointer: type mismatch at 1st parameter ('Foo *Bar::*' vs pointer)}}
+}
+
+
+void testFunctionPointer(void(*f)()) {
+  __builtin_clear_padding(f); // expected-error {{argument to __builtin_clear_padding must be a pointer to a trivially-copyable type ('void (*)()' invalid)}}
+}
+
+struct WithVLA {
+  int i;
+  char c[];
+};
+
+struct WithVLA2 {
+  int i2;
+  WithVLA w;
+};
+
+struct WithVLA3 {
+  WithVLA2 w2;
+};
+
+void testVLA(WithVLA* w1, WithVLA2* w2, WithVLA3* w3) {
+  __builtin_clear_padding(w1); // expected-error {{'WithVLA' has flexible array member, which is unsupported by __builtin_clear_padding}}
+  __builtin_clear_padding(w2); // expected-error {{'WithVLA2' has flexible array member, which is unsupported by __builtin_clear_padding}}
+  __builtin_clear_padding(w3); // expected-error {{'WithVLA3' has flexible array member, which is unsupported by __builtin_clear_padding}}
+}

diff  --git a/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
new file mode 100644
index 0000000000000..32bcd376cdcf9
--- /dev/null
+++ b/libcxx/test/libcxx/atomics/builtin_clear_padding.pass.cpp
@@ -0,0 +1,886 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// UNSUPPORTED: c++03
+// UNSUPPORTED: gcc
+// UNSUPPORTED: clang-19, clang-20, clang-21, clang-22, clang-23, apple-clang-17, apple-clang-21
+
+// ADDITIONAL_COMPILE_FLAGS: -Wno-deprecated-volatile -Wno-dynamic-class-memaccess
+
+#include <cassert>
+#include <cstdio>
+#include <cstring>
+#include <new>
+
+template <size_t A1, size_t A2, class T>
+struct alignas(A1) BasicWithPadding {
+  T x;
+  alignas(A2) T y;
+};
+
+template <size_t A1, size_t A2, size_t N, class T>
+struct alignas(A1) SpacedArrayMembers {
+  T x[N];
+  alignas(A2) char c;
+  T y[N];
+};
+
+template <size_t A1, size_t A2, class T>
+struct alignas(A1) PaddedPointerMembers {
+  T* x;
+  alignas(A2) T* y;
+};
+
+template <size_t A1, size_t A2, size_t A3, class T>
+struct alignas(A1) ThreeMembers {
+  T x;
+  alignas(A2) T y;
+  alignas(A3) T z;
+};
+
+template <class T>
+struct Normal {
+  T a;
+  T b;
+};
+
+template <class T>
+struct X {
+  T x;
+};
+
+template <class T>
+struct Z {
+  T z;
+};
+
+template <size_t A, class T>
+struct YZ : public Z<T> {
+  alignas(A) T y;
+};
+
+template <size_t A1, size_t A2, class T>
+struct alignas(A1) HasBase : public X<T>, public YZ<A2, T> {
+  T a;
+  alignas(A2) T b;
+};
+
+template <size_t A1, size_t A2, class T>
+void testAllStructsForType(T a, T b, T c, T d) {
+  // basic padding
+  {
+    using B = BasicWithPadding<A1, A2, T>;
+    B basic1;
+    memset(&basic1, 0, sizeof(B));
+    basic1.x = a;
+    basic1.y = b;
+    B basic2;
+    memset(&basic2, 42, sizeof(B));
+    basic2.x = a;
+    basic2.y = b;
+    assert(memcmp(&basic1, &basic2, sizeof(B)) != 0);
+    __builtin_clear_padding(&basic2);
+    assert(memcmp(&basic1, &basic2, sizeof(B)) == 0);
+  }
+
+  // spaced array
+  {
+    using A = SpacedArrayMembers<A1, A2, 2, T>;
+    A arr1;
+    memset(&arr1, 0, sizeof(A));
+    arr1.x[0] = a;
+    arr1.x[1] = b;
+    arr1.y[0] = c;
+    arr1.y[1] = d;
+    A arr2;
+    memset(&arr2, 42, sizeof(A));
+    arr2.x[0] = a;
+    arr2.x[1] = b;
+    arr2.y[0] = c;
+    arr2.y[1] = d;
+    arr2.c    = 0;
+    assert(memcmp(&arr1, &arr2, sizeof(A)) != 0);
+    __builtin_clear_padding(&arr2);
+    assert(memcmp(&arr1, &arr2, sizeof(A)) == 0);
+  }
+
+  // pointer members
+  {
+    using P = PaddedPointerMembers<A1, A2, T>;
+    P ptr1;
+    memset(&ptr1, 0, sizeof(P));
+    ptr1.x = &a;
+    ptr1.y = &b;
+    P ptr2;
+    memset(&ptr2, 42, sizeof(P));
+    ptr2.x = &a;
+    ptr2.y = &b;
+    assert(memcmp(&ptr1, &ptr2, sizeof(P)) != 0);
+    __builtin_clear_padding(&ptr2);
+    assert(memcmp(&ptr1, &ptr2, sizeof(P)) == 0);
+  }
+
+  // three members
+  {
+    using Three = ThreeMembers<A1, A2, A2, T>;
+    Three three1;
+    memset(&three1, 0, sizeof(Three));
+    three1.x = a;
+    three1.y = b;
+    three1.z = c;
+    Three three2;
+    memset(&three2, 42, sizeof(Three));
+    three2.x = a;
+    three2.y = b;
+    three2.z = c;
+    __builtin_clear_padding(&three2);
+    assert(memcmp(&three1, &three2, sizeof(Three)) == 0);
+  }
+
+  // Normal struct no padding
+  {
+    using N = Normal<T>;
+    N normal1;
+    memset(&normal1, 0, sizeof(N));
+    normal1.a = a;
+    normal1.b = b;
+    N normal2;
+    memset(&normal2, 42, sizeof(N));
+    normal2.a = a;
+    normal2.b = b;
+    __builtin_clear_padding(&normal2);
+    assert(memcmp(&normal1, &normal2, sizeof(N)) == 0);
+  }
+
+  // base class
+  {
+    using H = HasBase<A1, A2, T>;
+    H base1;
+    memset(&base1, 0, sizeof(H));
+    base1.a = a;
+    base1.b = b;
+    base1.x = c;
+    base1.y = d;
+    base1.z = a;
+    H base2;
+    memset(&base2, 42, sizeof(H));
+    base2.a = a;
+    base2.b = b;
+    base2.x = c;
+    base2.y = d;
+    base2.z = a;
+    assert(memcmp(&base1, &base2, sizeof(H)) != 0);
+    __builtin_clear_padding(&base2);
+    assert(memcmp(&base1, &base2, sizeof(H)) == 0);
+  }
+}
+
+void otherStructTests() {
+  // basic padding on the heap
+  {
+    using B      = BasicWithPadding<8, 4, char>;
+    auto* basic1 = new B;
+    memset(basic1, 0, sizeof(B));
+    basic1->x    = 1;
+    basic1->y    = 2;
+    auto* basic2 = new B;
+    memset(basic2, 42, sizeof(B));
+    basic2->x = 1;
+    basic2->y = 2;
+    assert(memcmp(basic1, basic2, sizeof(B)) != 0);
+    __builtin_clear_padding(basic2);
+    assert(memcmp(basic1, basic2, sizeof(B)) == 0);
+    delete basic2;
+    delete basic1;
+  }
+
+  // basic padding volatile on the heap
+  {
+    using B   = BasicWithPadding<8, 4, char>;
+    B* basic3 = new B;
+    memset(basic3, 0, sizeof(B));
+    basic3->x = 1;
+    basic3->y = 2;
+    B* basic4 = new B;
+    memset(basic4, 42, sizeof(B));
+    basic4->x = 1;
+    basic4->y = 2;
+    assert(memcmp(basic3, basic4, sizeof(B)) != 0);
+    __builtin_clear_padding(const_cast<volatile B*>(basic4));
+    __builtin_clear_padding(basic4);
+    assert(memcmp(basic3, basic4, sizeof(B)) == 0);
+    delete basic4;
+    delete basic3;
+  }
+}
+
+struct Foo {
+  int x;
+  int y;
+};
+
+typedef float Float4Vec __attribute__((ext_vector_type(4)));
+typedef bool Bool9Vec __attribute__((ext_vector_type(9)));
+typedef float Float3Vec __attribute__((ext_vector_type(3)));
+typedef long double LongDouble3Vec __attribute__((ext_vector_type(3)));
+
+void primitiveTests() {
+  // no padding
+  {
+    int i1 = 42, i2 = 42;
+    __builtin_clear_padding(&i1); // does nothing
+    assert(i1 == 42);
+    assert(memcmp(&i1, &i2, sizeof(int)) == 0);
+  }
+
+  // long double
+  {
+    long double d1, d2;
+    memset(&d1, 42, sizeof(long double));
+    memset(&d2, 0, sizeof(long double));
+
+    d1 = 3.0L;
+    d2 = 3.0L;
+
+    __builtin_clear_padding(&d1);
+    assert(d1 == 3.0L);
+    assert(memcmp(&d1, &d2, sizeof(long double)) == 0);
+  }
+
+  // _BitInt
+  {
+    using T = _BitInt(97);
+    T i1, i2;
+    memset(&i1, 42, sizeof(T));
+    memset(&i2, 0, sizeof(T));
+
+    i1 = 37;
+    i2 = 37;
+    __builtin_clear_padding(&i1);
+    assert(i1 == 37);
+    assert(memcmp(&i1, &i2, sizeof(T)) == 0);
+  }
+
+  // _Complex
+  {
+    _Complex long double c1, c2;
+
+    memset(&c1, 42, sizeof(_Complex long double));
+    memset(&c2, 0, sizeof(_Complex long double));
+    c1 = 3.0L;
+    c1 = 3.0L;
+    __builtin_clear_padding(&c1);
+    //TODO
+  }
+}
+
+void structTests() {
+  // no_unique_address
+  {
+    struct S1 {
+      int x;
+      char c;
+    };
+
+    struct S2 {
+      [[no_unique_address]] S1 s;
+      bool b;
+    };
+
+    S2 s1, s2;
+    memset(&s1, 42, sizeof(S2));
+    memset(&s2, 0, sizeof(S2));
+
+    s1.s.x = 4;
+    s1.s.c = 'a';
+    s1.b   = true;
+    s2.s.x = 4;
+    s2.s.c = 'a';
+    s2.b   = true;
+
+    assert(memcmp(&s1, &s2, sizeof(S2)) != 0);
+    __builtin_clear_padding(&s1);
+    assert(s1.s.x == 4);
+    assert(s1.s.c == 'a');
+    assert(s1.b == true);
+
+    assert(memcmp(&s1, &s2, sizeof(S2)) == 0);
+  }
+
+  // struct with long double
+  {
+    struct S {
+      long double l;
+      bool b;
+    };
+
+    S s1, s2;
+    memset(&s1, 42, sizeof(S));
+    memset(&s2, 0, sizeof(S));
+
+    s1.l = 3.0L;
+    s1.b = true;
+    s2.l = 3.0L;
+    s2.b = true;
+
+    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    __builtin_clear_padding(&s1);
+    assert(s1.l == 3.0L);
+    assert(s1.b == true);
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+  }
+
+  // EBO
+  {
+    struct Empty {};
+    struct B {
+      int i;
+    };
+    struct S : Empty, B {
+      bool b;
+    };
+
+    S s1, s2;
+    memset(&s1, 42, sizeof(S));
+    memset(&s2, 0, sizeof(S));
+
+    s1.i = 4;
+    s1.b = true;
+    s2.i = 4;
+    s2.b = true;
+
+    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    __builtin_clear_padding(&s1);
+    assert(s1.i == 4);
+    assert(s1.b == true);
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+  }
+
+  // padding between bases
+  {
+    struct B1 {
+      char c1;
+    };
+    struct B2 {
+      alignas(4) char c2;
+    };
+
+    struct S : B1, B2 {};
+
+    S s1, s2;
+    memset(&s1, 42, sizeof(S));
+    memset(&s2, 0, sizeof(S));
+
+    s1.c1 = 'a';
+    s1.c2 = 'b';
+    s2.c1 = 'a';
+    s2.c2 = 'b';
+
+    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    __builtin_clear_padding(&s1);
+    assert(s1.c1 == 'a');
+    assert(s1.c2 == 'b');
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+  }
+
+  // padding after last base
+  {
+    struct B1 {
+      char c1;
+    };
+    struct B2 {
+      char c2;
+    };
+
+    struct S : B1, B2 {
+      alignas(4) char c3;
+    };
+
+    S s1, s2;
+    memset(&s1, 42, sizeof(S));
+    memset(&s2, 0, sizeof(S));
+
+    s1.c1 = 'a';
+    s1.c2 = 'b';
+    s1.c3 = 'c';
+    s2.c1 = 'a';
+    s2.c2 = 'b';
+    s2.c3 = 'c';
+
+    assert(memcmp(&s1, &s2, sizeof(S)) != 0);
+    __builtin_clear_padding(&s1);
+    assert(s1.c1 == 'a');
+    assert(s1.c2 == 'b');
+    assert(s1.c3 == 'c');
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+  }
+
+  // vtable
+  {
+    struct VirtualBase {
+      unsigned int x;
+      virtual int call() { return x; };
+      virtual ~VirtualBase() = default;
+    };
+
+    struct NonVirtualBase {
+      char y;
+    };
+
+    struct S : VirtualBase, NonVirtualBase {
+      virtual int call() override { return 5; }
+      bool z;
+    };
+
+    char buff1[sizeof(S)];
+    char buff2[sizeof(S)];
+    memset(buff1, 0, sizeof(S));
+    memset(buff2, 42, sizeof(S));
+
+    S* s1_data = new (&buff1) S;
+    S* s2_data = new (&buff2) S;
+
+    S s1;
+    S s2;
+
+    std::memcpy(&s1, s1_data, sizeof(S));
+    std::memcpy(&s2, s2_data, sizeof(S));
+
+    s1.x = 0xFFFFFFFF;
+    s2.x = 0xFFFFFFFF;
+    s1.y = 'a';
+    s2.y = 'a';
+    s1.z = true;
+    s2.z = true;
+    __builtin_clear_padding(&s2);
+    assert(s2.x == 0xFFFFFFFF);
+    assert(s2.y == 'a');
+    assert(s2.z == true);
+    assert(s2.call() == 5);
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+  }
+
+  // multiple bases with vtable
+  {
+    struct VirtualBase1 {
+      unsigned int x1;
+      virtual int call1() { return x1; };
+      virtual ~VirtualBase1() = default;
+    };
+
+    struct VirtualBase2 {
+      unsigned int x2;
+      virtual int call2() { return x2; };
+      virtual ~VirtualBase2() = default;
+    };
+
+    struct VirtualBase3 {
+      unsigned int x3;
+      virtual int call3() { return x3; };
+      virtual ~VirtualBase3() = default;
+    };
+
+    struct NonVirtualBase {
+      char y;
+    };
+
+    struct S : VirtualBase1, VirtualBase2, NonVirtualBase, VirtualBase3 {
+      virtual int call1() override { return 5; }
+      bool z;
+    };
+
+    char buff1[sizeof(S)];
+    char buff2[sizeof(S)];
+    memset(buff1, 0, sizeof(S));
+    memset(buff2, 42, sizeof(S));
+
+    S* s1_data = new (&buff1) S;
+    S* s2_data = new (&buff2) S;
+
+    S s1;
+    S s2;
+
+    std::memcpy(&s1, s1_data, sizeof(S));
+    std::memcpy(&s2, s2_data, sizeof(S));
+
+    s1.x1 = 0xFFFFFFFF;
+    s2.x1 = 0xFFFFFFFF;
+    s1.x2 = 0xFAFAFAFA;
+    s2.x2 = 0xFAFAFAFA;
+    s1.x3 = 0xAAAAAAAA;
+    s2.x3 = 0xAAAAAAAA;
+    s1.y  = 'a';
+    s2.y  = 'a';
+    s1.z  = true;
+    s2.z  = true;
+    __builtin_clear_padding(&s2);
+    assert(s2.x1 == 0xFFFFFFFF);
+    assert(s2.x2 == 0xFAFAFAFA);
+    assert(s2.x3 == 0xAAAAAAAA);
+    assert(s2.y == 'a');
+    assert(s2.z == true);
+    assert(s2.call1() == 5);
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+  }
+
+  // chain of bases with virtual functions
+  {
+    struct VirtualBase1 {
+      unsigned int x1;
+      virtual int call1() { return x1; };
+      virtual ~VirtualBase1() = default;
+    };
+
+    struct VirtualBase2 : VirtualBase1 {
+      unsigned int x2;
+      virtual int call2() { return x2; };
+      virtual ~VirtualBase2() = default;
+    };
+
+    struct VirtualBase3 : VirtualBase2 {
+      unsigned int x3;
+      virtual int call3() { return x3; };
+      virtual ~VirtualBase3() = default;
+    };
+
+    struct NonVirtualBase {
+      char y;
+    };
+
+    struct S : NonVirtualBase, VirtualBase3 {
+      //virtual int call() override { return 5; }
+      bool z;
+    };
+
+    char buff1[sizeof(S)];
+    char buff2[sizeof(S)];
+    memset(buff1, 0, sizeof(S));
+    memset(buff2, 42, sizeof(S));
+    S* s1_data = new (&buff1) S;
+    S* s2_data = new (&buff2) S;
+
+    S s1;
+    S s2;
+
+    std::memcpy(&s1, s1_data, sizeof(S));
+    std::memcpy(&s2, s2_data, sizeof(S));
+
+    s1.x1 = 0xFFFFFFFF;
+    s2.x1 = 0xFFFFFFFF;
+    s1.x2 = 0xFAFAFAFA;
+    s2.x2 = 0xFAFAFAFA;
+    s1.x3 = 0xAAAAAAAA;
+    s2.x3 = 0xAAAAAAAA;
+    s1.y  = 'a';
+    s2.y  = 'a';
+    s1.z  = true;
+    s2.z  = true;
+    __builtin_clear_padding(&s2);
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+  }
+
+  // virtual inheritance
+  {
+    struct Base {
+      int x;
+    };
+    struct D1 : virtual Base {
+      int d1;
+      bool b1;
+    };
+    struct D2 : virtual Base {
+      int d2;
+      bool b2;
+    };
+
+    struct S : D1, D2 {
+      bool s;
+    };
+
+    char buff1[sizeof(S)];
+    char buff2[sizeof(S)];
+    memset(buff1, 0, sizeof(S));
+    memset(buff2, 42, sizeof(S));
+    S* s1_data = new (&buff1) S;
+    S* s2_data = new (&buff2) S;
+
+    S s1;
+    S s2;
+
+    std::memcpy(&s1, s1_data, sizeof(S));
+    std::memcpy(&s2, s2_data, sizeof(S));
+
+    s1.x  = 0xFFFFFFFF;
+    s2.x  = 0xFFFFFFFF;
+    s1.d1 = 0xFAFAFAFA;
+    s2.d1 = 0xFAFAFAFA;
+    s1.d2 = 0xAAAAAAAA;
+    s2.d2 = 0xAAAAAAAA;
+    s1.b1 = true;
+    s2.b1 = true;
+    s1.b2 = true;
+    s2.b2 = true;
+    s1.s  = true;
+    s2.s  = true;
+    __builtin_clear_padding(&s2);
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+  }
+
+  // bit fields
+  {
+    struct S {
+      // will usually occupy 2 bytes:
+      unsigned char b1 : 3; // 1st 3 bits (in 1st byte) are b1
+      unsigned char b2 : 2; // next 2 bits (in 1st byte) are blocked out as unused
+      unsigned char b3 : 6; // 6 bits for b2 - doesn't fit into the 1st byte => starts a 2nd
+      unsigned char b4 : 2; // 2 bits for b3 - next (and final) bits in the 2nd byte
+    };
+
+    S s1, s2;
+    memset(&s1, 0, sizeof(S));
+    memset(&s2, 42, sizeof(S));
+
+    s1.b1 = 5;
+    s2.b1 = 5;
+    s1.b2 = 3;
+    s2.b2 = 3;
+    s1.b3 = 27;
+    s2.b3 = 27;
+    s1.b4 = 3;
+    s2.b4 = 3;
+    __builtin_clear_padding(&s2);
+    assert(memcmp(&s1, &s2, sizeof(S)) == 0);
+  }
+
+  testAllStructsForType<32, 16, char>(11, 22, 33, 44);
+  testAllStructsForType<64, 32, char>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, volatile char>(11, 22, 33, 44);
+  testAllStructsForType<64, 32, volatile char>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, int>(0, 1, 2, 3);
+  testAllStructsForType<64, 32, int>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, volatile int>(0, 1, 2, 3);
+  testAllStructsForType<64, 32, volatile int>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, double>(0, 1, 2, 3);
+  testAllStructsForType<64, 32, double>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, _BitInt(28)>(0, 1, 2, 3);
+  testAllStructsForType<64, 32, _BitInt(28)>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, _BitInt(60)>(0, 1, 2, 3);
+  testAllStructsForType<64, 32, _BitInt(60)>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, _BitInt(64)>(0, 1, 2, 3);
+  testAllStructsForType<64, 32, _BitInt(64)>(4, 5, 6, 7);
+  testAllStructsForType<32, 16, Foo>(Foo{1, 2}, Foo{3, 4}, Foo{1, 2}, Foo{3, 4});
+  testAllStructsForType<64, 32, Foo>(Foo{1, 2}, Foo{3, 4}, Foo{1, 2}, Foo{3, 4});
+  testAllStructsForType<256, 128, Float4Vec>(0, 1, 2, 3);
+  testAllStructsForType<128, 128, Float4Vec>(4, 5, 6, 7);
+
+  otherStructTests();
+}
+
+void unionTests() {
+  // 
diff erent length, do not clear object repr bits of non-active member
+  {
+    union u {
+      int i;
+      char c;
+    };
+
+    u u1, u2;
+    memset(&u1, 42, sizeof(u));
+    memset(&u2, 42, sizeof(u));
+    u1.c = '4';
+    u2.c = '4';
+
+    __builtin_clear_padding(&u1); // should have no effect
+    assert(u1.c == '4');
+
+    assert(memcmp(&u1, &u2, sizeof(u)) == 0);
+  }
+
+  // tail padding of longest member
+  {
+    struct s {
+      alignas(8) char c1;
+    };
+
+    union u {
+      s s1;
+      char c2;
+    };
+
+    u u1, u2;
+    memset(&u1, 42, sizeof(u));
+    memset(&u2, 0, sizeof(u));
+
+    u1.s1.c1 = '4';
+    u2.s1.c1 = '4';
+
+    assert(memcmp(&u1, &u2, sizeof(u)) != 0);
+    __builtin_clear_padding(&u1);
+    assert(u1.s1.c1 == '4');
+    assert(memcmp(&u1, &u2, sizeof(u)) == 0);
+  }
+}
+
+void arrayTests() {
+  // no padding
+  {
+    int i1[2] = {1, 2};
+    int i2[2] = {1, 2};
+
+    __builtin_clear_padding(&i1);
+    assert(i1[0] == 1);
+    assert(i1[1] == 2);
+    assert(memcmp(&i1, &i2, 2 * sizeof(int)) == 0);
+  }
+
+  // long double
+  {
+    long double d1[2], d2[2];
+    memset(&d1, 42, 2 * sizeof(long double));
+    memset(&d2, 0, 2 * sizeof(long double));
+
+    d1[0] = 3.0L;
+    d1[1] = 4.0L;
+    d2[0] = 3.0L;
+    d2[1] = 4.0L;
+
+    __builtin_clear_padding(&d1);
+    assert(d1[0] == 3.0L);
+    assert(d2[1] == 4.0L);
+    assert(memcmp(&d1, &d2, 2 * sizeof(long double)) == 0);
+  }
+
+  // struct
+  {
+    struct S {
+      int i1;
+      char c1;
+      int i2;
+      char c2;
+    };
+
+    S s1[2], s2[2];
+    memset(&s1, 42, 2 * sizeof(S));
+    memset(&s2, 0, 2 * sizeof(S));
+
+    s1[0].i1 = 1;
+    s1[0].c1 = 'a';
+    s1[0].i2 = 2;
+    s1[0].c2 = 'b';
+    s1[1].i1 = 3;
+    s1[1].c1 = 'c';
+    s1[1].i2 = 4;
+    s1[1].c2 = 'd';
+
+    s2[0].i1 = 1;
+    s2[0].c1 = 'a';
+    s2[0].i2 = 2;
+    s2[0].c2 = 'b';
+    s2[1].i1 = 3;
+    s2[1].c1 = 'c';
+    s2[1].i2 = 4;
+    s2[1].c2 = 'd';
+
+    assert(memcmp(&s1, &s2, 2 * sizeof(S)) != 0);
+    __builtin_clear_padding(&s1);
+
+    assert(s1[0].i1 == 1);
+    assert(s1[0].c1 == 'a');
+    assert(s1[0].i2 == 2);
+    assert(s1[0].c2 == 'b');
+    assert(s1[1].i1 == 3);
+    assert(s1[1].c1 == 'c');
+    assert(s1[1].i2 == 4);
+    assert(s1[1].c2 == 'd');
+    assert(memcmp(&s1, &s2, 2 * sizeof(S)) == 0);
+  }
+}
+
+void vectorTests() {
+  // bool vector (packed bits with potential tail padding to storage size)
+  {
+    Bool9Vec v1, v2;
+    memset(&v1, 42, sizeof(Bool9Vec));
+    memset(&v2, 0, sizeof(Bool9Vec));
+
+    v1[0] = true;
+    v1[1] = false;
+    v1[2] = true;
+    v1[3] = false;
+    v1[4] = true;
+    v1[5] = true;
+    v1[6] = false;
+    v1[7] = true;
+    v1[8] = false;
+    v2[0] = true;
+    v2[1] = false;
+    v2[2] = true;
+    v2[3] = false;
+    v2[4] = true;
+    v2[5] = true;
+    v2[6] = false;
+    v2[7] = true;
+    v2[8] = false;
+
+    __builtin_clear_padding(&v1);
+    assert(v1[0] == true);
+    assert(v1[1] == false);
+    assert(v1[7] == true);
+    assert(v1[8] == false);
+    assert(memcmp(&v1, &v2, sizeof(Bool9Vec)) == 0);
+  }
+
+  // long double vector
+  {
+    LongDouble3Vec v1, v2;
+    memset(&v1, 42, sizeof(LongDouble3Vec));
+    memset(&v2, 0, sizeof(LongDouble3Vec));
+
+    v1[0] = 1.0L;
+    v1[1] = 2.0L;
+    v1[2] = 3.0L;
+    v2[0] = 1.0L;
+    v2[1] = 2.0L;
+    v2[2] = 3.0L;
+
+    __builtin_clear_padding(&v1);
+    assert(v1[0] == 1.0L);
+    assert(v1[1] == 2.0L);
+    assert(v1[2] == 3.0L);
+    assert(memcmp(&v1, &v2, sizeof(LongDouble3Vec)) == 0);
+  }
+
+  // float vector
+  {
+    Float3Vec v1, v2;
+    memset(&v1, 42, sizeof(Float3Vec));
+    memset(&v2, 0, sizeof(Float3Vec));
+
+    v1[0] = 1.0f;
+    v1[1] = 2.0f;
+    v1[2] = 3.0f;
+    v2[0] = 1.0f;
+    v2[1] = 2.0f;
+    v2[2] = 3.0f;
+
+    __builtin_clear_padding(&v1);
+    assert(v1[0] == 1.0f);
+    assert(v1[1] == 2.0f);
+    assert(v1[2] == 3.0f);
+    assert(memcmp(&v1, &v2, sizeof(Float3Vec)) == 0);
+  }
+}
+
+int main(int, const char**) {
+  primitiveTests();
+  unionTests();
+  structTests();
+  arrayTests();
+  vectorTests();
+
+  return 0;
+}


        


More information about the libcxx-commits mailing list