[clang] [llvm] Explicit types in cbuffer layouts (PR #156919)

Justin Bogner via cfe-commits cfe-commits at lists.llvm.org
Thu Oct 16 11:46:45 PDT 2025


https://github.com/bogner updated https://github.com/llvm/llvm-project/pull/156919

>From c4b14c7bd10d569f5811e7c8b45a82cf26f8ee4e Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Tue, 9 Sep 2025 14:08:37 -0700
Subject: [PATCH 1/4] [DirectX] Fix crash when naming buffers of arrays

DXILResource was falling over trying to name a resource type that
contained an array, such as `StructuredBuffer<float[3][2]>`. Handle this
by walking through array types to gather the dimensions.
---
 llvm/lib/Analysis/DXILResource.cpp                     | 8 ++++++++
 llvm/test/CodeGen/DirectX/Metadata/resource-symbols.ll | 9 ++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp
index f9bf09262dd1f..6f19a68dcd194 100644
--- a/llvm/lib/Analysis/DXILResource.cpp
+++ b/llvm/lib/Analysis/DXILResource.cpp
@@ -255,6 +255,12 @@ static void formatTypeName(SmallString<64> &Dest, StringRef Name,
   if (!ContainedType)
     return;
 
+  SmallVector<uint64_t> ArrayDimensions;
+  while (ArrayType *AT = dyn_cast<ArrayType>(ContainedType)) {
+    ArrayDimensions.push_back(AT->getNumElements());
+    ContainedType = AT->getElementType();
+  }
+
   StringRef ElementName;
   ElementType ET = toDXILElementType(ContainedType, IsSigned);
   if (ET != ElementType::Invalid) {
@@ -271,6 +277,8 @@ static void formatTypeName(SmallString<64> &Dest, StringRef Name,
   DestStream << "<" << ElementName;
   if (const FixedVectorType *VTy = dyn_cast<FixedVectorType>(ContainedType))
     DestStream << VTy->getNumElements();
+  for (uint64_t Dim : ArrayDimensions)
+    DestStream << "[" << Dim << "]";
   DestStream << ">";
 }
 
diff --git a/llvm/test/CodeGen/DirectX/Metadata/resource-symbols.ll b/llvm/test/CodeGen/DirectX/Metadata/resource-symbols.ll
index 4f13f4789cd66..56798c8382d45 100644
--- a/llvm/test/CodeGen/DirectX/Metadata/resource-symbols.ll
+++ b/llvm/test/CodeGen/DirectX/Metadata/resource-symbols.ll
@@ -28,6 +28,11 @@ define void @test() {
       @llvm.dx.resource.handlefrombinding(i32 0, i32 10, i32 1, i32 0, ptr @SB.str)
   ; CHECK: %"StructuredBuffer<struct.S>" = type { %struct.S }
 
+  ; StructuredBuffer<float[3][2]>
+  %struct1 = call target("dx.RawBuffer", [3 x [2 x float]], 0, 0)
+      @llvm.dx.resource.handlefrombinding(i32 0, i32 12, i32 1, i32 0, ptr null)
+  ; CHECK: %"StructuredBuffer<float[3][2]>" = type { [3 x [2 x float]] }
+
   ; ByteAddressBuffer
   %byteaddr = call target("dx.RawBuffer", i8, 0, 0)
       @llvm.dx.resource.handlefrombinding(i32 0, i32 20, i32 1, i32 0, ptr null)
@@ -40,12 +45,14 @@ define void @test() {
 ; CHECK-NEXT: @[[T1:.*]] = external constant %"Buffer<int32_t>"
 ; CHECK-NEXT: @[[T2:.*]] = external constant %"Buffer<uint32_t3>"
 ; CHECK-NEXT: @[[S0:.*]] = external constant %"StructuredBuffer<struct.S>"
+; CHECK-NEXT: @[[S1:.*]] = external constant %"StructuredBuffer<float[3][2]>"
 ; CHECK-NEXT: @[[B0:.*]] = external constant %ByteAddressBuffer
 
 ; CHECK: !{i32 0, ptr @[[T0]], !"A"
 ; CHECK: !{i32 1, ptr @[[T1]], !""
 ; CHECK: !{i32 2, ptr @[[T2]], !""
 ; CHECK: !{i32 3, ptr @[[S0]], !"SB"
-; CHECK: !{i32 4, ptr @[[B0]], !""
+; CHECK: !{i32 4, ptr @[[S1]], !""
+; CHECK: !{i32 5, ptr @[[B0]], !""
 
 attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }

>From fecafc45a73556eac9b7731963fa288949912b89 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Wed, 11 Jun 2025 16:21:41 -0400
Subject: [PATCH 2/4] wip: Update DXILCBufferAccess to padding approach

---
 clang/lib/CodeGen/TargetInfo.h                |  14 ++
 clang/lib/CodeGen/Targets/DirectX.cpp         |  13 ++
 clang/lib/CodeGen/Targets/SPIR.cpp            |  14 ++
 llvm/include/llvm/Frontend/HLSL/CBuffer.h     |   6 +-
 llvm/lib/Frontend/HLSL/CBuffer.cpp            |  35 ++-
 llvm/lib/IR/Type.cpp                          |   4 +
 llvm/lib/Target/DirectX/DXILCBufferAccess.cpp |  93 +-------
 .../DirectX/CBufferAccess/array-typedgep.ll   |  16 +-
 .../CodeGen/DirectX/CBufferAccess/arrays.ll   |  97 ++++----
 .../CodeGen/DirectX/CBufferAccess/float.ll    |   6 +-
 .../DirectX/CBufferAccess/gep-ce-two-uses.ll  |  18 +-
 .../CodeGen/DirectX/CBufferAccess/memcpy.ll   | 216 ------------------
 .../CodeGen/DirectX/CBufferAccess/scalars.ll  |  34 +--
 .../CodeGen/DirectX/CBufferAccess/vectors.ll  |  36 +--
 14 files changed, 179 insertions(+), 423 deletions(-)
 delete mode 100644 llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll

diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index d0edae1295094..8b59fde4b4120 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -448,6 +448,20 @@ class TargetCodeGenInfo {
     return nullptr;
   }
 
+  virtual llvm::Type *
+  getHLSLPadding(CodeGenModule &CGM, CharUnits NumBytes) const {
+    return llvm::ArrayType::get(llvm::Type::getInt8Ty(CGM.getLLVMContext()),
+                                NumBytes.getQuantity());
+  }
+
+  virtual bool isHLSLPadding(llvm::Type *Ty) const {
+    // TODO: Do we actually want to default these functions like this?
+    if (auto *AT = dyn_cast<llvm::ArrayType>(Ty))
+      if (AT->getElementType() == llvm::Type::getInt8Ty(Ty->getContext()))
+        return true;
+    return false;
+  }
+
   // Set the Branch Protection Attributes of the Function accordingly to the
   // BPI. Remove attributes that contradict with current BPI.
   static void
diff --git a/clang/lib/CodeGen/Targets/DirectX.cpp b/clang/lib/CodeGen/Targets/DirectX.cpp
index b4cebb9a32aca..d44e531dacdff 100644
--- a/clang/lib/CodeGen/Targets/DirectX.cpp
+++ b/clang/lib/CodeGen/Targets/DirectX.cpp
@@ -32,6 +32,19 @@ class DirectXTargetCodeGenInfo : public TargetCodeGenInfo {
   llvm::Type *
   getHLSLType(CodeGenModule &CGM, const Type *T,
               const SmallVector<int32_t> *Packoffsets = nullptr) const override;
+
+  llvm::Type *getHLSLPadding(CodeGenModule &CGM,
+                             CharUnits NumBytes) const override {
+    unsigned Size = NumBytes.getQuantity();
+    return llvm::TargetExtType::get(CGM.getLLVMContext(), "dx.Padding", {},
+                                    {Size});
+  }
+
+  bool isHLSLPadding(llvm::Type *Ty) const override {
+    if (auto *TET = dyn_cast<llvm::TargetExtType>(Ty))
+      return TET->getName() == "dx.Padding";
+    return false;
+  }
 };
 
 llvm::Type *DirectXTargetCodeGenInfo::getHLSLType(
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 3f6d4e0a9277a..abad3c78ea1a2 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -56,6 +56,20 @@ class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo {
   llvm::Type *
   getHLSLType(CodeGenModule &CGM, const Type *Ty,
               const SmallVector<int32_t> *Packoffsets = nullptr) const override;
+
+  llvm::Type *
+  getHLSLPadding(CodeGenModule &CGM, CharUnits NumBytes) const override {
+    unsigned Size = NumBytes.getQuantity();
+    return llvm::TargetExtType::get(CGM.getLLVMContext(), "spirv.Padding", {},
+                                    {Size});
+  }
+
+  bool isHLSLPadding(llvm::Type *Ty) const override {
+    if (auto *TET = dyn_cast<llvm::TargetExtType>(Ty))
+      return TET->getName() == "spirv.Padding";
+    return false;
+  }
+
   llvm::Type *getSPIRVImageTypeFromHLSLResource(
       const HLSLAttributedResourceType::Attributes &attributes,
       QualType SampledType, CodeGenModule &CGM) const;
diff --git a/llvm/include/llvm/Frontend/HLSL/CBuffer.h b/llvm/include/llvm/Frontend/HLSL/CBuffer.h
index 694a7fa854576..f4e232ffe1745 100644
--- a/llvm/include/llvm/Frontend/HLSL/CBuffer.h
+++ b/llvm/include/llvm/Frontend/HLSL/CBuffer.h
@@ -46,7 +46,8 @@ class CBufferMetadata {
   CBufferMetadata(NamedMDNode *MD) : MD(MD) {}
 
 public:
-  static std::optional<CBufferMetadata> get(Module &M);
+  static std::optional<CBufferMetadata>
+  get(Module &M, llvm::function_ref<bool(Type *)> IsPadding);
 
   using iterator = SmallVector<CBufferMapping>::iterator;
   iterator begin() { return Mappings.begin(); }
@@ -55,9 +56,6 @@ class CBufferMetadata {
   void eraseFromModule();
 };
 
-APInt translateCBufArrayOffset(const DataLayout &DL, APInt Offset,
-                               ArrayType *Ty);
-
 } // namespace hlsl
 } // namespace llvm
 
diff --git a/llvm/lib/Frontend/HLSL/CBuffer.cpp b/llvm/lib/Frontend/HLSL/CBuffer.cpp
index 407b6ad6d5a7e..329357ac8f08e 100644
--- a/llvm/lib/Frontend/HLSL/CBuffer.cpp
+++ b/llvm/lib/Frontend/HLSL/CBuffer.cpp
@@ -15,25 +15,28 @@
 using namespace llvm;
 using namespace llvm::hlsl;
 
-static size_t getMemberOffset(GlobalVariable *Handle, size_t Index) {
+static SmallVector<size_t>
+getMemberOffsets(const DataLayout &DL, GlobalVariable *Handle,
+                 llvm::function_ref<bool(Type *)> IsPadding) {
+  SmallVector<size_t> Offsets;
+
   auto *HandleTy = cast<TargetExtType>(Handle->getValueType());
   assert((HandleTy->getName().ends_with(".CBuffer") ||
           HandleTy->getName() == "spirv.VulkanBuffer") &&
          "Not a cbuffer type");
   assert(HandleTy->getNumTypeParameters() == 1 && "Expected layout type");
+  auto *LayoutTy = cast<StructType>(HandleTy->getTypeParameter(0));
 
-  auto *LayoutTy = cast<TargetExtType>(HandleTy->getTypeParameter(0));
-  assert(LayoutTy->getName().ends_with(".Layout") && "Not a layout type");
-
-  // Skip the "size" parameter.
-  size_t ParamIndex = Index + 1;
-  assert(LayoutTy->getNumIntParameters() > ParamIndex &&
-         "Not enough parameters");
+  const StructLayout *SL = DL.getStructLayout(LayoutTy);
+  for (int I = 0, E = LayoutTy->getNumElements(); I < E; ++I)
+    if (!IsPadding(LayoutTy->getElementType(I)))
+      Offsets.push_back(SL->getElementOffset(I));
 
-  return LayoutTy->getIntParameter(ParamIndex);
+  return Offsets;
 }
 
-std::optional<CBufferMetadata> CBufferMetadata::get(Module &M) {
+std::optional<CBufferMetadata>
+CBufferMetadata::get(Module &M, llvm::function_ref<bool(Type *)> IsPadding) {
   NamedMDNode *CBufMD = M.getNamedMetadata("hlsl.cbs");
   if (!CBufMD)
     return std::nullopt;
@@ -47,13 +50,16 @@ std::optional<CBufferMetadata> CBufferMetadata::get(Module &M) {
         cast<ValueAsMetadata>(MD->getOperand(0))->getValue());
     CBufferMapping &Mapping = Result->Mappings.emplace_back(Handle);
 
+    SmallVector<size_t> MemberOffsets =
+        getMemberOffsets(M.getDataLayout(), Handle, IsPadding);
+
     for (int I = 1, E = MD->getNumOperands(); I < E; ++I) {
       Metadata *OpMD = MD->getOperand(I);
       // Some members may be null if they've been optimized out.
       if (!OpMD)
         continue;
       auto *V = cast<GlobalVariable>(cast<ValueAsMetadata>(OpMD)->getValue());
-      Mapping.Members.emplace_back(V, getMemberOffset(Handle, I - 1));
+      Mapping.Members.emplace_back(V, MemberOffsets[I - 1]);
     }
   }
 
@@ -64,10 +70,3 @@ void CBufferMetadata::eraseFromModule() {
   // Remove the cbs named metadata
   MD->eraseFromParent();
 }
-
-APInt hlsl::translateCBufArrayOffset(const DataLayout &DL, APInt Offset,
-                                     ArrayType *Ty) {
-  int64_t TypeSize = DL.getTypeSizeInBits(Ty->getElementType()) / 8;
-  int64_t RoundUp = alignTo(TypeSize, Align(CBufferRowSizeInBytes));
-  return Offset.udiv(TypeSize) * RoundUp;
-}
diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp
index 0e9535d24a4cc..b060a6c4f3bf6 100644
--- a/llvm/lib/IR/Type.cpp
+++ b/llvm/lib/IR/Type.cpp
@@ -1008,6 +1008,10 @@ static TargetTypeInfo getTargetTypeInfo(const TargetExtType *Ty) {
   }
   if (Name == "spirv.IntegralConstant" || Name == "spirv.Literal")
     return TargetTypeInfo(Type::getVoidTy(C));
+  if (Name == "spirv.Padding")
+    return TargetTypeInfo(
+        ArrayType::get(Type::getInt8Ty(C), Ty->getIntParameter(0)),
+        TargetExtType::CanBeGlobal);
   if (Name.starts_with("spirv."))
     return TargetTypeInfo(PointerType::get(C, 0), TargetExtType::HasZeroInit,
                           TargetExtType::CanBeGlobal,
diff --git a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
index 44277971acd60..e6ba437ce417e 100644
--- a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
@@ -8,6 +8,7 @@
 
 #include "DXILCBufferAccess.h"
 #include "DirectX.h"
+#include "llvm/Analysis/DXILResource.h"
 #include "llvm/Frontend/HLSL/CBuffer.h"
 #include "llvm/Frontend/HLSL/HLSLResource.h"
 #include "llvm/IR/IRBuilder.h"
@@ -97,10 +98,6 @@ struct CBufferResource {
       (void)Success;
       assert(Success && "Offsets into cbuffer globals must be constant");
 
-      if (auto *ATy = dyn_cast<ArrayType>(Member->getValueType()))
-        ConstantOffset =
-            hlsl::translateCBufArrayOffset(DL, ConstantOffset, ATy);
-
       return ConstantOffset.getZExtValue();
     }
 
@@ -194,88 +191,14 @@ static void replaceLoad(LoadInst *LI, CBufferResource &CBR,
   DeadInsts.push_back(LI);
 }
 
-/// This function recursively copies N array elements from the cbuffer resource
-/// CBR to the MemCpy Destination. Recursion is used to unravel multidimensional
-/// arrays into a sequence of scalar/vector extracts and stores.
-static void copyArrayElemsForMemCpy(IRBuilder<> &Builder, MemCpyInst *MCI,
-                                    CBufferResource &CBR, ArrayType *ArrTy,
-                                    size_t ArrOffset, size_t N,
-                                    const Twine &Name = "") {
-  const DataLayout &DL = MCI->getDataLayout();
-  Type *ElemTy = ArrTy->getElementType();
-  size_t ElemTySize = DL.getTypeAllocSize(ElemTy);
-  for (unsigned I = 0; I < N; ++I) {
-    size_t Offset = ArrOffset + I * ElemTySize;
-
-    // Recursively copy nested arrays
-    if (ArrayType *ElemArrTy = dyn_cast<ArrayType>(ElemTy)) {
-      copyArrayElemsForMemCpy(Builder, MCI, CBR, ElemArrTy, Offset,
-                              ElemArrTy->getNumElements(), Name);
-      continue;
-    }
-
-    // Load CBuffer value and store it in Dest
-    APInt CBufArrayOffset(
-        DL.getIndexTypeSizeInBits(MCI->getSource()->getType()), Offset);
-    CBufArrayOffset =
-        hlsl::translateCBufArrayOffset(DL, CBufArrayOffset, ArrTy);
-    Value *CBufferVal =
-        CBR.loadValue(Builder, ElemTy, CBufArrayOffset.getZExtValue(), Name);
-    Value *GEP =
-        Builder.CreateInBoundsGEP(Builder.getInt8Ty(), MCI->getDest(),
-                                  {Builder.getInt32(Offset)}, Name + ".dest");
-    Builder.CreateStore(CBufferVal, GEP, MCI->isVolatile());
-  }
-}
-
-/// Replace memcpy from a cbuffer global with a memcpy from the cbuffer handle
-/// itself. Assumes the cbuffer global is an array, and the length of bytes to
-/// copy is divisible by array element allocation size.
-/// The memcpy source must also be a direct cbuffer global reference, not a GEP.
-static void replaceMemCpy(MemCpyInst *MCI, CBufferResource &CBR) {
-
-  ArrayType *ArrTy = dyn_cast<ArrayType>(CBR.getValueType());
-  assert(ArrTy && "MemCpy lowering is only supported for array types");
-
-  // This assumption vastly simplifies the implementation
-  if (MCI->getSource() != CBR.Member)
-    reportFatalUsageError(
-        "Expected MemCpy source to be a cbuffer global variable");
-
-  ConstantInt *Length = dyn_cast<ConstantInt>(MCI->getLength());
-  uint64_t ByteLength = Length->getZExtValue();
-
-  // If length to copy is zero, no memcpy is needed
-  if (ByteLength == 0) {
-    MCI->eraseFromParent();
-    return;
-  }
-
-  const DataLayout &DL = CBR.getDataLayout();
-
-  Type *ElemTy = ArrTy->getElementType();
-  size_t ElemSize = DL.getTypeAllocSize(ElemTy);
-  assert(ByteLength % ElemSize == 0 &&
-         "Length of bytes to MemCpy must be divisible by allocation size of "
-         "source/destination array elements");
-  size_t ElemsToCpy = ByteLength / ElemSize;
-
-  IRBuilder<> Builder(MCI);
-  CBR.createAndSetCurrentHandle(Builder);
-
-  copyArrayElemsForMemCpy(Builder, MCI, CBR, ArrTy, 0, ElemsToCpy,
-                          "memcpy." + MCI->getDest()->getName() + "." +
-                              MCI->getSource()->getName());
-
-  MCI->eraseFromParent();
-}
-
 static void replaceAccessesWithHandle(CBufferResource &CBR) {
   SmallVector<WeakTrackingVH> DeadInsts;
 
   SmallVector<User *> ToProcess{CBR.users()};
   while (!ToProcess.empty()) {
     User *Cur = ToProcess.pop_back_val();
+    assert(!isa<MemCpyInst>(Cur) &&
+           "memcpy should have been removed in an earlier pass");
 
     // If we have a load instruction, replace the access.
     if (auto *LI = dyn_cast<LoadInst>(Cur)) {
@@ -283,13 +206,6 @@ static void replaceAccessesWithHandle(CBufferResource &CBR) {
       continue;
     }
 
-    // If we have a memcpy instruction, replace it with multiple accesses and
-    // subsequent stores to the destination
-    if (auto *MCI = dyn_cast<MemCpyInst>(Cur)) {
-      replaceMemCpy(MCI, CBR);
-      continue;
-    }
-
     // Otherwise, walk users looking for a load...
     if (isa<GetElementPtrInst>(Cur) || isa<GEPOperator>(Cur)) {
       ToProcess.append(Cur->user_begin(), Cur->user_end());
@@ -302,7 +218,8 @@ static void replaceAccessesWithHandle(CBufferResource &CBR) {
 }
 
 static bool replaceCBufferAccesses(Module &M) {
-  std::optional<hlsl::CBufferMetadata> CBufMD = hlsl::CBufferMetadata::get(M);
+  std::optional<hlsl::CBufferMetadata> CBufMD = hlsl::CBufferMetadata::get(
+      M, [](Type *Ty) { return isa<llvm::dxil::PaddingExtType>(Ty); });
   if (!CBufMD)
     return false;
 
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll
index 52ad0f3df1aba..10ea19df1d177 100644
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll
@@ -3,24 +3,24 @@
 ; cbuffer CB : register(b0) {
 ;   float a1[3];
 ; }
-%__cblayout_CB = type <{ [3 x float] }>
+%__cblayout_CB = type <{ [2 x <{ float, [12 x i8] }>], float }>
 
- at CB.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) poison
+ at CB.cb = global target("dx.CBuffer", %__cblayout_CB) poison
 ; CHECK: @CB.cb =
 ; CHECK-NOT: external {{.*}} addrspace(2) global
- at a1 = external addrspace(2) global [3 x float], align 4
+ at a1 = external addrspace(2) global <{ [2 x <{ float, [12 x i8] }>], float }>, align 4
 
 ; CHECK: define void @f
 define void @f(ptr %dst) {
 entry:
-  %CB.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBs_36_0tt(i32 0, i32 0, i32 1, i32 0, ptr null)
-  store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) %CB.cb_h, ptr @CB.cb, align 4
+  %CB.cb_h = call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+  store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
   ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
   ; CHECK: store float [[X]], ptr %dst
-  %a1 = load float, ptr addrspace(2) getelementptr inbounds ([3 x float], ptr addrspace(2) @a1, i32 0, i32 1), align 4
+  %a1 = load float, ptr addrspace(2) getelementptr inbounds (<{ [2 x <{ float, [12 x i8] }>], float }>, ptr addrspace(2) @a1, i32 0, i32 0, i32 1), align 4
   store float %a1, ptr %dst, align 32
 
   ret void
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll
index db4e14c1336a6..714df11a3a1fa 100644
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll
@@ -1,47 +1,60 @@
 ; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s
 
+; TODO: Remove datalayout.
+; This hack forces dxil-compatible alignment of 3-element 32- and 64-bit vectors
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64-v96:32:32-v192:64:64"
+
 ; cbuffer CB : register(b0) {
-;   float a1[3];
-;   double3 a2[2];
-;   float16_t a3[2][2];
-;   uint64_t a4[3];
-;   int4 a5[2][3][4];
-;   uint16_t a6[1];
-;   int64_t a7[2];
-;   bool a8[4];
+;   float a1[3];        // offset   0,  size 4  (+12) * 3
+;   double3 a2[2];      // offset   48, size 24  (+8) * 2
+;   float16_t a3[2][2]; // offset  112, size  2 (+14) * 4
+;   uint64_t a4[3];     // offset  176, size  8  (+8) * 3
+;   int4 a5[2][3][4];   // offset  224, size 16       * 24
+;   uint16_t a6[1];     // offset  608, size  2 (+14) * 1
+;   int64_t a7[2];      // offset  624, size  8  (+8) * 2
+;   bool a8[4];         // offset  656, size  4 (+12) * 4
 ; }
-%__cblayout_CB = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [2 x [3 x [4 x <4 x i32>]]], [1 x i16], [2 x i64], [4 x i32] }>
+%__cblayout_CB = type <{
+  <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, target("dx.Padding", 12),
+  <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, target("dx.Padding", 8),
+  <{ [3 x <{ half, target("dx.Padding", 14) }>], half }>, target("dx.Padding", 14),
+  <{ [2 x <{ i64, target("dx.Padding", 8) }>], i64 }>, target("dx.Padding", 8),
+  [24 x <4 x i32>],
+  [1 x i16], target("dx.Padding", 14),
+  <{ [1 x <{ i64, target("dx.Padding", 8) }>], i64 }>, target("dx.Padding", 8),
+  <{ [3 x <{ i32, target("dx.Padding", 12) }>], i32 }>
+}>
 
- at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) poison
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
 ; CHECK: @CB.cb =
 ; CHECK-NOT: external {{.*}} addrspace(2) global
- at a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4
- at a2 = external local_unnamed_addr addrspace(2) global [2 x <3 x double>], align 32
- at a3 = external local_unnamed_addr addrspace(2) global [2 x [2 x half]], align 2
- at a4 = external local_unnamed_addr addrspace(2) global [3 x i64], align 8
- at a5 = external local_unnamed_addr addrspace(2) global [2 x [3 x [4 x <4 x i32>]]], align 16
+ at a1 = external local_unnamed_addr addrspace(2) global <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, align 4
+ at a2 = external local_unnamed_addr addrspace(2) global <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, align 32
+ at a3 = external local_unnamed_addr addrspace(2) global <{ [3 x <{ half, target("dx.Padding", 14) }>], half }>, align 2
+ at a4 = external local_unnamed_addr addrspace(2) global <{ [2 x <{ i64, target("dx.Padding", 8) }>], i64 }>, align 8
+ at a5 = external local_unnamed_addr addrspace(2) global [24 x <4 x i32>], align 16
 @a6 = external local_unnamed_addr addrspace(2) global [1 x i16], align 2
- at a7 = external local_unnamed_addr addrspace(2) global [2 x i64], align 8
- at a8 = external local_unnamed_addr addrspace(2) global [4 x i32], align 4
+ at a7 = external local_unnamed_addr addrspace(2) global <{ [1 x <{ i64, target("dx.Padding", 8) }>], i64 }>, align 8
+ at a8 = external local_unnamed_addr addrspace(2) global <{ [3 x <{ i32, target("dx.Padding", 12) }>], i32 }>, align 4
 
 ; CHECK: define void @f
 define void @f(ptr %dst) {
 entry:
-  %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
-  store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) %CB.cb_h.i.i, ptr @CB.cb, align 4
+  %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+  store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
   ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
   ; CHECK: store float [[X]], ptr %dst
-  %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 4), align 4
+  %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 16), align 4
   store float %a1, ptr %dst, align 32
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 5)
   ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
   ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
-  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 6)
+  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 6)
   ; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
   ; CHECK: [[VEC0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
   ; CHECK: [[VEC1:%.*]] = insertelement <3 x double> [[VEC0]], double [[Y]], i32 1
@@ -52,26 +65,26 @@ entry:
   %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 8
   store <3 x double> %a2, ptr %a2.i, align 32
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 8)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 8)
   ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 32
   ; CHECK: store half [[X]], ptr [[PTR]]
-  %a3 = load half, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a3, i32 6), align 2
+  %a3 = load half, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a3, i32 16), align 2
   %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 32
   store half %a3, ptr %a3.i, align 2
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 12)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 12)
   ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 40
   ; CHECK: store i64 [[X]], ptr [[PTR]]
-  %a4 = load i64, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a4, i32 8), align 8
+  %a4 = load i64, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a4, i32 16), align 8
   %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 40
   store i64 %a4, ptr %a4.i, align 8
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 26)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 26)
   ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
   ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
   ; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
@@ -82,12 +95,12 @@ entry:
   ; CHECK: [[VEC3:%.*]] = insertelement <4 x i32> [[VEC2]], i32 [[A]], i32 3
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 48
   ; CHECK: store <4 x i32> [[VEC3]], ptr [[PTR]]
-  %a5 = load <4 x i32>, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a5, i32 272), align 4
+  %a5 = load <4 x i32>, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a5, i32 192), align 4
   %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 48
   store <4 x i32> %a5, ptr %a5.i, align 4
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 38)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 38)
   ; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 64
   ; CHECK: store i16 [[X]], ptr [[PTR]]
@@ -95,21 +108,21 @@ entry:
   %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 64
   store i16 %a6, ptr %a6.i, align 2
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 40)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 40)
   ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 72
   ; CHECK: store i64 [[X]], ptr [[PTR]]
-  %a7 = load i64, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a7, i32 8), align 8
+  %a7 = load i64, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a7, i32 16), align 8
   %a7.i = getelementptr inbounds nuw i8, ptr %dst, i32 72
   store i64 %a7, ptr %a7.i, align 8
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 42)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 42)
   ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 80
   ; CHECK: store i32 [[X]], ptr [[PTR]]
-  %a8 = load i32, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a8, i32 4), align 4, !range !1, !noundef !2
+  %a8 = load i32, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a8, i32 16), align 4, !range !1, !noundef !2
   %a8.i = getelementptr inbounds nuw i8, ptr %dst, i32 80
   store i32 %a8, ptr %a8.i, align 4
 
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll
index d7272b449166d..6f45fb6ae2e24 100644
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll
@@ -2,7 +2,7 @@
 
 %__cblayout_CB = type <{ float }>
 
- at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 4, 0)) poison
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
 ; CHECK: @CB.cb =
 ; CHECK-NOT: external {{.*}} addrspace(2) global
 @x = external local_unnamed_addr addrspace(2) global float, align 4
@@ -10,8 +10,8 @@
 ; CHECK: define void @f
 define void @f(ptr %dst) {
 entry:
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0)
   ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
   ; CHECK: store float [[X]], ptr %dst
   %x = load float, ptr addrspace(2) @x, align 4
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll
index abe087dbe6100..21724d32f10d0 100644
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll
@@ -3,28 +3,28 @@
 ; cbuffer CB : register(b0) {
 ;   float a1[3];
 ; }
-%__cblayout_CB = type <{ [3 x float] }>
+%__cblayout_CB = type <{ [2 x <{ float, [12 x i8] }>], float }>
 
- at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) poison
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
 ; CHECK: @CB.cb =
 ; CHECK-NOT: external {{.*}} addrspace(2) global
- at a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4
+ at a1 = external addrspace(2) global <{ [2 x <{ float, [12 x i8] }>], float }>, align 4
 
 ; CHECK: define void @f
 define void @f(ptr %dst) {
 entry:
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
   ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
   ; CHECK: store float [[X]], ptr %dst
-  %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 4), align 4
+  %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 16), align 4
   store float %a1, ptr %dst, align 32
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
   ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
   ; CHECK: store float [[X]], ptr %dst
-  %a2 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 4), align 4
+  %a2 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 16), align 4
   store float %a2, ptr %dst, align 32
 
   ret void
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll
deleted file mode 100644
index f1486f974fb36..0000000000000
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll
+++ /dev/null
@@ -1,216 +0,0 @@
-; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s
-
-; cbuffer CB : register(b0) {
-;   float a1[3];
-;   double3 a2[2];
-;   float16_t a3[2][2];
-;   uint64_t a4[3];
-;   int2 a5[3][2];
-;   uint16_t a6[1];
-;   int64_t a7[2];
-;   bool a8[4];
-; }
-%__cblayout_CB = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [3 x [2 x <2 x i32>]], [1 x i16], [2 x i64], [4 x i32] }>
-
- at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) poison
- at a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4
- at a2 = external local_unnamed_addr addrspace(2) global [2 x <3 x double>], align 32
- at a3 = external local_unnamed_addr addrspace(2) global [2 x [2 x half]], align 2
- at a4 = external local_unnamed_addr addrspace(2) global [3 x i64], align 8
- at a5 = external local_unnamed_addr addrspace(2) global [3 x [2 x <2 x i32>]], align 16
- at a6 = external local_unnamed_addr addrspace(2) global [1 x i16], align 2
- at a7 = external local_unnamed_addr addrspace(2) global [2 x i64], align 8
- at a8 = external local_unnamed_addr addrspace(2) global [4 x i32], align 4
-
-; CHECK: define void @f(
-define void @f(ptr %dst) {
-entry:
-  %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
-  store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) %CB.cb_h.i.i, ptr @CB.cb, align 4
-
-  %a1.copy = alloca [3 x float], align 4
-  %a2.copy = alloca [2 x <3 x double>], align 32
-  %a3.copy = alloca [2 x [2 x half]], align 2
-  %a4.copy = alloca [3 x i64], align 8
-  %a5.copy = alloca [3 x [2 x <2 x i32>]], align 16
-  %a6.copy = alloca [1 x i16], align 2
-  %a7.copy = alloca [2 x i64], align 8
-  %a8.copy = alloca [4 x i32], align 4
-
-  ; Try copying no elements
-; CHECK-NOT: memcpy
-  call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a1.copy, ptr addrspace(2) align 4 @a1, i32 0, i1 false)
-
-  ; Try copying only the first element
-; CHECK:    [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK:    [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
-; CHECK:    [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY:%.*]], i32 0
-; CHECK:    store float [[X]], ptr [[DEST]], align 4
-  call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a1.copy, ptr addrspace(2) align 4 @a1, i32 4, i1 false)
-
-; CHECK:    [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK:    [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
-; CHECK:    [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY:%.*]], i32 0
-; CHECK:    store float [[X]], ptr [[DEST]], align 4
-; CHECK:    [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
-; CHECK:    [[Y:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY]], i32 4
-; CHECK:    store float [[Y]], ptr [[DEST]], align 4
-; CHECK:    [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 2)
-; CHECK:    [[Z:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY]], i32 8
-; CHECK:    store float [[Z]], ptr [[DEST]], align 4
-  call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a1.copy, ptr addrspace(2) align 4 @a1, i32 12, i1 false)
-
-; CHECK:    [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK:    [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 3)
-; CHECK:    [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
-; CHECK:    [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
-; CHECK:    [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 4)
-; CHECK:    [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
-; CHECK:    [[UPTO0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
-; CHECK:    [[UPTO1:%.*]] = insertelement <3 x double> [[UPTO0]], double [[Y]], i32 1
-; CHECK:    [[UPTO2:%.*]] = insertelement <3 x double> [[UPTO1]], double [[Z]], i32 2
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY:%.*]], i32 0
-; CHECK:    store <3 x double> [[UPTO2]], ptr [[DEST]], align 8
-; CHECK:    [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5)
-; CHECK:    [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
-; CHECK:    [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
-; CHECK:    [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 6)
-; CHECK:    [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
-; CHECK:    [[UPTO0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
-; CHECK:    [[UPTO1:%.*]] = insertelement <3 x double> [[UPTO0]], double [[Y]], i32 1
-; CHECK:    [[UPTO2:%.*]] = insertelement <3 x double> [[UPTO1]], double [[Z]], i32 2
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY]], i32 24
-; CHECK:    store <3 x double> [[UPTO2]], ptr [[DEST]], align 8
-  call void @llvm.memcpy.p0.p2.i32(ptr align 32 %a2.copy, ptr addrspace(2) align 32 @a2, i32 48, i1 false)
-
-; CHECK:    [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK:    [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 7)
-; CHECK:    [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY:%.*]], i32 0
-; CHECK:    store half [[X]], ptr [[DEST]], align 2
-; CHECK:    [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 8)
-; CHECK:    [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 2
-; CHECK:    store half [[Y]], ptr [[DEST]], align 2
-; CHECK:    [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 9)
-; CHECK:    [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 4
-; CHECK:    store half [[X]], ptr [[DEST]], align 2
-; CHECK:    [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 10)
-; CHECK:    [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 6
-; CHECK:    store half [[Y]], ptr [[DEST]], align 2
-  call void @llvm.memcpy.p0.p2.i32(ptr align 2 %a3.copy, ptr addrspace(2) align 2 @a3, i32 8, i1 false)
-
-; CHECK:    [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK:    [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 11)
-; CHECK:    [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY:%.*]], i32 0
-; CHECK:    store i64 [[X]], ptr [[DEST]], align 8
-; CHECK:    [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 12)
-; CHECK:    [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY]], i32 8
-; CHECK:    store i64 [[Y]], ptr [[DEST]], align 8
-; CHECK:    [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 13)
-; CHECK:    [[Z:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY]], i32 16
-; CHECK:    store i64 [[Z]], ptr [[DEST]], align 8
-  call void @llvm.memcpy.p0.p2.i32(ptr align 8 %a4.copy, ptr addrspace(2) align 8 @a4, i32 24, i1 false)
-
-; CHECK:    [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK:    [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 14)
-; CHECK:    [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK:    [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
-; CHECK:    [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
-; CHECK:    [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY:%.*]], i32 0
-; CHECK:    store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
-; CHECK:    [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 15)
-; CHECK:    [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK:    [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
-; CHECK:    [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
-; CHECK:    [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 8
-; CHECK:    store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
-; CHECK:    [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 16)
-; CHECK:    [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK:    [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
-; CHECK:    [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
-; CHECK:    [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 16
-; CHECK:    store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
-; CHECK:    [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 17)
-; CHECK:    [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK:    [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
-; CHECK:    [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
-; CHECK:    [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 24
-; CHECK:    store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
-; CHECK:    [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 18)
-; CHECK:    [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK:    [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
-; CHECK:    [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
-; CHECK:    [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 32
-; CHECK:    store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
-; CHECK:    [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 19)
-; CHECK:    [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK:    [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
-; CHECK:    [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
-; CHECK:    [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 40
-; CHECK:    store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
-  call void @llvm.memcpy.p0.p2.i32(ptr align 16 %a5.copy, ptr addrspace(2) align 16 @a5, i32 48, i1 false)
-
-; CHECK:    [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK:    [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 17)
-; CHECK:    [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A6_COPY:%.*]], i32 0
-; CHECK:    store i16 [[X]], ptr [[DEST]], align 2
-  call void @llvm.memcpy.p0.p2.i32(ptr align 2 %a6.copy, ptr addrspace(2) align 2 @a6, i32 2, i1 false)
-
-; CHECK:    [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK:    [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 18)
-; CHECK:    [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A7_COPY:%.*]], i32 0
-; CHECK:    store i64 [[X]], ptr [[DEST]], align 8
-; CHECK:    [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 19)
-; CHECK:    [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A7_COPY]], i32 8
-; CHECK:    store i64 [[Y]], ptr [[DEST]], align 8
-  call void @llvm.memcpy.p0.p2.i32(ptr align 8 %a7.copy, ptr addrspace(2) align 8 @a7, i32 16, i1 false)
-
-; CHECK:    [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK:    [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 20)
-; CHECK:    [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY:%.*]], i32 0
-; CHECK:    store i32 [[X]], ptr [[DEST]], align 4
-; CHECK:    [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 21)
-; CHECK:    [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 4
-; CHECK:    store i32 [[Y]], ptr [[DEST]], align 4
-; CHECK:    [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 22)
-; CHECK:    [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 8
-; CHECK:    store i32 [[Z]], ptr [[DEST]], align 4
-; CHECK:    [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 23)
-; CHECK:    [[W:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 12
-; CHECK:    store i32 [[W]], ptr [[DEST]], align 4
-  call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a8.copy, ptr addrspace(2) align 4 @a8, i32 16, i1 false)
-
-  ret void
-}
-
-declare void @llvm.memcpy.p0.p2.i32(ptr noalias writeonly captures(none), ptr addrspace(2) noalias readonly captures(none), i32, i1 immarg)
-
-; CHECK-NOT: !hlsl.cbs =
-!hlsl.cbs = !{!0}
-
-!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6, ptr addrspace(2) @a7, ptr addrspace(2) @a8}
-!1 = !{i32 0, i32 2}
-!2 = !{}
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll
index 7857c25d69636..e53ba1c273240 100644
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll
@@ -11,7 +11,7 @@
 ; }
 %__cblayout_CB = type <{ float, i32, i32, half, i16, double, i64 }>
 
- at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) poison
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
 ; CHECK: @CB.cb =
 ; CHECK-NOT: external {{.*}} addrspace(2) global
 @a1 = external local_unnamed_addr addrspace(2) global float, align 4
@@ -25,18 +25,18 @@
 ; CHECK: define void @f
 define void @f(ptr %dst) {
 entry:
-  %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
-  store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) %CB.cb_h.i.i, ptr @CB.cb, align 4
+  %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+  store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0)
   ; CHECK: [[A1:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
   ; CHECK: store float [[A1]], ptr %dst
   %a1 = load float, ptr addrspace(2) @a1, align 4
   store float %a1, ptr %dst, align 8
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0)
   ; CHECK: [[A2:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4
   ; CHECK: store i32 [[A2]], ptr [[PTR]]
@@ -44,8 +44,8 @@ entry:
   %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 4
   store i32 %a2, ptr %a2.i, align 4
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0)
   ; CHECK: [[A3:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 8
   ; CHECK: store i32 [[A3]], ptr [[PTR]]
@@ -53,8 +53,8 @@ entry:
   %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 8
   store i32 %a3, ptr %a3.i, align 8
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0)
   ; CHECK: [[A4:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 6
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 12
   ; CHECK: store half [[A4]], ptr [[PTR]]
@@ -62,8 +62,8 @@ entry:
   %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 12
   store half %a4, ptr %a4.i, align 4
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0)
   ; CHECK: [[A5:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 7
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 14
   ; CHECK: store i16 [[A5]], ptr [[PTR]]
@@ -71,8 +71,8 @@ entry:
   %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 14
   store i16 %a5, ptr %a5.i, align 2
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
   ; CHECK: [[A6:%.*]] = extractvalue { double, double } [[LOAD]], 0
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 16
   ; CHECK: store double [[A6]], ptr [[PTR]]
@@ -80,8 +80,8 @@ entry:
   %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 16
   store double %a6, ptr %a6.i, align 8
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
   ; CHECK: [[A7:%.*]] = extractvalue { i64, i64 } [[LOAD]], 1
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 24
   ; CHECK: store i64 [[A7]], ptr [[PTR]]
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll
index 4160008a986af..e99c77dbe2710 100644
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll
@@ -8,9 +8,9 @@
 ;   int4 a5;       // offset  80, size 16
 ;   uint16_t3 a6;  // offset  96, size  6 (+10)
 ; };
-%__cblayout_CB = type <{ <3 x float>, <3 x double>, <2 x half>, <3 x i64>, <4 x i32>, <3 x i16> }>
+%__cblayout_CB = type <{ <3 x float>, target("dx.Padding", 4), <3 x double>, <2 x half>, target("dx.Padding", 4), <3 x i64>, target("dx.Padding", 8), <4 x i32>, <3 x i16> }>
 
- at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 102, 0, 16, 40, 48, 80, 96)) poison
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
 ; CHECK: @CB.cb =
 ; CHECK-NOT: external {{.*}} addrspace(2) global
 @a1 = external local_unnamed_addr addrspace(2) global <3 x float>, align 16
@@ -23,11 +23,11 @@
 ; CHECK: define void @f
 define void @f(ptr %dst) {
 entry:
-  %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 102, 0, 16, 40, 48, 80, 96)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
-  store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 102, 0, 16, 40, 48, 80, 96)) %CB.cb_h.i.i, ptr @CB.cb, align 4
+  %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+  store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0)
   ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
   ; CHECK: [[Y:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 1
   ; CHECK: [[Z:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 2
@@ -38,11 +38,11 @@ entry:
   %a1 = load <3 x float>, ptr addrspace(2) @a1, align 16
   store <3 x float> %a1, ptr %dst, align 4
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
   ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
   ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
-  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 2)
+  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2)
   ; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
   ; CHECK: [[VEC0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
   ; CHECK: [[VEC1:%.*]] = insertelement <3 x double> [[VEC0]], double [[Y]], i32 1
@@ -53,8 +53,8 @@ entry:
   %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 16
   store <3 x double> %a2, ptr %a2.i, align 8
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 2)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2)
   ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 4
   ; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 5
   ; CHECK: [[VEC0:%.*]] = insertelement <2 x half> poison, half [[X]], i32 0
@@ -65,11 +65,11 @@ entry:
   %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 40
   store <2 x half> %a3, ptr %a3.i, align 2
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 3)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 3)
   ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
   ; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 1
-  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 4)
+  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 4)
   ; CHECK: [[Z:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
   ; CHECK: [[VEC0:%.*]] = insertelement <3 x i64> poison, i64 [[X]], i32 0
   ; CHECK: [[VEC1:%.*]] = insertelement <3 x i64> [[VEC0]], i64 [[Y]], i32 1
@@ -80,8 +80,8 @@ entry:
   %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 48
   store <3 x i64> %a4, ptr %a4.i, align 8
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 5)
   ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
   ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
   ; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
@@ -96,8 +96,8 @@ entry:
   %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 72
   store <4 x i32> %a5, ptr %a5.i, align 4
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 6)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 6)
   ; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0
   ; CHECK: [[Y:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 1
   ; CHECK: [[Z:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 2

>From 7c7d47475e6a55a056bdafaae46883047741538a Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Wed, 11 Jun 2025 11:09:21 -0400
Subject: [PATCH 3/4] wip: Explicit structs in clang codegen

This abandons the `dx.Layout` idea and just uses explicit padding.

Note: Reordered fields break stuff, including ones from implicit bindings.
---
 clang/lib/CodeGen/CGExpr.cpp                  |  26 ++
 clang/lib/CodeGen/CGExprAgg.cpp               |   4 +
 clang/lib/CodeGen/CGHLSLRuntime.cpp           | 165 ++++++++++-
 clang/lib/CodeGen/CGHLSLRuntime.h             |  23 +-
 clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp | 270 +++++-------------
 clang/lib/CodeGen/HLSLBufferLayoutBuilder.h   |  47 +--
 clang/lib/CodeGen/Targets/DirectX.cpp         |   7 +-
 clang/lib/CodeGen/Targets/SPIR.cpp            |   7 +-
 clang/test/CodeGenHLSL/ArrayAssignable.hlsl   |  70 +++--
 .../GlobalConstructorFunction.hlsl            |   6 +-
 clang/test/CodeGenHLSL/resources/cbuffer.hlsl | 251 ++++++++++------
 .../resources/cbuffer_and_namespaces.hlsl     |  10 +-
 .../resources/cbuffer_with_packoffset.hlsl    |  13 +-
 ...uffer_with_static_global_and_function.hlsl |   2 +-
 .../resources/default_cbuffer.hlsl            |  11 +-
 .../default_cbuffer_with_layout.hlsl          |   3 +
 llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp  |   7 +-
 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp |   3 -
 .../CodeGen/SPIRV/hlsl-resources/cbuffer.ll   |   8 +-
 .../SPIRV/hlsl-resources/cbuffer_unused.ll    |  18 +-
 20 files changed, 560 insertions(+), 391 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 8439ec7fb8eae..d577d2c24cd31 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4869,6 +4869,32 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
         emitArraySubscriptGEP(*this, Int8Ty, Addr.emitRawPointer(*this),
                               ScaledIdx, false, SignedIndices, E->getExprLoc());
     Addr = Address(EltPtr, OrigBaseElemTy, EltAlign);
+  } else if (getLangOpts().HLSL &&
+             E->getType().getAddressSpace() == LangAS::hlsl_constant) {
+    // This is an array inside of a cbuffer.
+    Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo);
+
+    SmallVector<llvm::Value *, 2> Indices;
+    Indices.push_back(EmitIdxAfterBase(/*Promote*/true));
+
+    CharUnits ElementSize = getContext().getTypeSizeInChars(E->getType());
+    CharUnits RowAlignedSize = ElementSize.alignTo(CharUnits::fromQuantity(16));
+
+    llvm::Type *EltTyToIndex = Addr.getElementType();
+    if (RowAlignedSize > ElementSize) {
+      llvm::Type *Padding = CGM.getTargetCodeGenInfo().getHLSLPadding(
+          CGM, RowAlignedSize - ElementSize);
+      EltTyToIndex = llvm::StructType::get(
+          getLLVMContext(), {EltTyToIndex, Padding}, /*isPacked=*/true);
+      Indices.push_back(llvm::ConstantInt::get(Indices[0]->getType(), 0));
+}
+
+    CharUnits EltAlign =
+      getArrayElementAlign(Addr.getAlignment(), Indices[0], RowAlignedSize);
+    llvm::Value *EltPtr =
+        emitArraySubscriptGEP(*this, EltTyToIndex, Addr.emitRawPointer(*this),
+                              Indices, false, SignedIndices, E->getExprLoc());
+    Addr = Address(EltPtr, Addr.getElementType(), EltAlign);
   } else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) {
     // If this is A[i] where A is an array, the frontend will have decayed the
     // base to be a ArrayToPointerDecay implicit cast.  While correct, it is
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index eee397f1f3d19..bd7d30b10d4d0 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -2279,6 +2279,10 @@ void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty,
     }
   }
 
+  if (getLangOpts().HLSL && Ty.getAddressSpace() == LangAS::hlsl_constant)
+    if (CGM.getHLSLRuntime().emitBufferCopy(*this, DestPtr, SrcPtr, Ty))
+      return;
+
   // Aggregate assignment turns into llvm.memcpy.  This is almost valid per
   // C99 6.5.16.1p3, which states "If the value being stored in an object is
   // read from another object that overlaps in anyway the storage of the first
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index ecab9336a9f82..13a9534d800c0 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -13,10 +13,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "CGHLSLRuntime.h"
-#include "Address.h"
 #include "CGDebugInfo.h"
 #include "CodeGenFunction.h"
 #include "CodeGenModule.h"
+#include "HLSLBufferLayoutBuilder.h"
 #include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Attrs.inc"
@@ -26,6 +26,7 @@
 #include "clang/AST/Type.h"
 #include "clang/Basic/TargetOptions.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Frontend/HLSL/RootSignatureMetadata.h"
@@ -43,6 +44,8 @@
 #include <cstdint>
 #include <optional>
 
+#define DEBUG_TYPE "cghlslruntime"
+
 using namespace clang;
 using namespace CodeGen;
 using namespace clang::hlsl;
@@ -265,9 +268,9 @@ CGHLSLRuntime::convertHLSLSpecificType(const Type *T,
   assert(T->isHLSLSpecificType() && "Not an HLSL specific type!");
 
   // Check if the target has a specific translation for this type first.
-  if (llvm::Type *TargetTy =
+  if (llvm::Type *LayoutTy =
           CGM.getTargetCodeGenInfo().getHLSLType(CGM, T, Packoffsets))
-    return TargetTy;
+    return LayoutTy;
 
   llvm_unreachable("Generic handling of HLSL types is not supported.");
 }
@@ -284,10 +287,8 @@ void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl,
 
   // get the layout struct from constant buffer target type
   llvm::Type *BufType = BufGV->getValueType();
-  llvm::Type *BufLayoutType =
-      cast<llvm::TargetExtType>(BufType)->getTypeParameter(0);
   llvm::StructType *LayoutStruct = cast<llvm::StructType>(
-      cast<llvm::TargetExtType>(BufLayoutType)->getTypeParameter(0));
+      cast<llvm::TargetExtType>(BufType)->getTypeParameter(0));
 
   // Start metadata list associating the buffer global variable with its
   // constatns
@@ -326,6 +327,9 @@ void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl,
       continue;
     }
 
+    if (CGM.getTargetCodeGenInfo().isHLSLPadding(*ElemIt))
+      ++ElemIt;
+
     assert(ElemIt != LayoutStruct->element_end() &&
            "number of elements in layout struct does not match");
     llvm::Type *LayoutType = *ElemIt++;
@@ -423,12 +427,11 @@ void CGHLSLRuntime::addBuffer(const HLSLBufferDecl *BufDecl) {
   if (BufDecl->hasValidPackoffset())
     fillPackoffsetLayout(BufDecl, Layout);
 
-  llvm::TargetExtType *TargetTy =
-      cast<llvm::TargetExtType>(convertHLSLSpecificType(
-          ResHandleTy, BufDecl->hasValidPackoffset() ? &Layout : nullptr));
+  llvm::Type *LayoutTy = convertHLSLSpecificType(
+      ResHandleTy, BufDecl->hasValidPackoffset() ? &Layout : nullptr);
   llvm::GlobalVariable *BufGV = new GlobalVariable(
-      TargetTy, /*isConstant*/ false,
-      GlobalValue::LinkageTypes::ExternalLinkage, PoisonValue::get(TargetTy),
+      LayoutTy, /*isConstant*/ false,
+      GlobalValue::LinkageTypes::ExternalLinkage, PoisonValue::get(LayoutTy),
       llvm::formatv("{0}{1}", BufDecl->getName(),
                     BufDecl->isCBuffer() ? ".cb" : ".tb"),
       GlobalValue::NotThreadLocal);
@@ -454,7 +457,7 @@ void CGHLSLRuntime::addRootSignature(
                      SignatureDecl->getRootElements(), nullptr, M);
 }
 
-llvm::TargetExtType *
+llvm::StructType *
 CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) {
   const auto Entry = LayoutTypes.find(StructType);
   if (Entry != LayoutTypes.end())
@@ -463,7 +466,7 @@ CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) {
 }
 
 void CGHLSLRuntime::addHLSLBufferLayoutType(const RecordType *StructType,
-                                            llvm::TargetExtType *LayoutTy) {
+                                            llvm::StructType *LayoutTy) {
   assert(getHLSLBufferLayoutType(StructType) == nullptr &&
          "layout type for this struct already exist");
   LayoutTypes[StructType] = LayoutTy;
@@ -997,3 +1000,139 @@ std::optional<LValue> CGHLSLRuntime::emitResourceArraySubscriptExpr(
   }
   return CGF.MakeAddrLValue(TmpVar, ResultTy, AlignmentSource::Decl);
 }
+
+namespace {
+class HLSLBufferCopyEmitter {
+  CodeGenFunction &CGF;
+  Address DestPtr;
+  Address SrcPtr;
+  llvm::Type *LayoutTy = nullptr;
+
+  SmallVector<llvm::Value *> CurStoreIndices;
+  SmallVector<llvm::Value *> CurLoadIndices;
+
+  void emitCopyAtIndices(llvm::Type *FieldTy, unsigned StoreIndex,
+                         unsigned LoadIndex) {
+    CurStoreIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, StoreIndex));
+    CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, LoadIndex));
+    auto RestoreIndices = llvm::make_scope_exit([&]() {
+      CurStoreIndices.pop_back();
+      CurLoadIndices.pop_back();
+    });
+
+    if (processArray(FieldTy))
+      return;
+    if (processBufferLayoutArray(FieldTy))
+      return;
+    if (processStruct(FieldTy))
+      return;
+
+    // We have a scalar or vector element - emit a copy.
+    CharUnits Align = CharUnits::fromQuantity(
+        CGF.CGM.getDataLayout().getABITypeAlign(FieldTy));
+    Address SrcGEP = RawAddress(
+        CGF.Builder.CreateInBoundsGEP(LayoutTy, SrcPtr.getBasePointer(),
+                                      CurLoadIndices, "cbuf.src"),
+        FieldTy, Align, SrcPtr.isKnownNonNull());
+    Address DestGEP = CGF.Builder.CreateInBoundsGEP(
+        DestPtr, CurStoreIndices, FieldTy, Align, "cbuf.dest");
+    llvm::Value *Load = CGF.Builder.CreateLoad(SrcGEP, "cbuf.load");
+    CGF.Builder.CreateStore(Load, DestGEP);
+  }
+
+  bool processArray(llvm::Type *FieldTy) {
+    auto *AT = dyn_cast<llvm::ArrayType>(FieldTy);
+    if (!AT)
+      return false;
+
+    // If we have an array then there isn't any padding
+    // between elements. We just need to copy each element over.
+    for (unsigned I = 0, E = AT->getNumElements(); I < E; ++I)
+      emitCopyAtIndices(AT->getElementType(), I, I);
+    return true;
+  }
+
+  bool processBufferLayoutArray(llvm::Type *FieldTy) {
+    auto *ST = dyn_cast<llvm::StructType>(FieldTy);
+    if (!ST || ST->getNumElements() != 2)
+      return false;
+
+    auto *PaddedEltsTy = dyn_cast<llvm::ArrayType>(ST->getElementType(0));
+    if (!PaddedEltsTy)
+      return false;
+
+    auto *PaddedTy = dyn_cast<llvm::StructType>(PaddedEltsTy->getElementType());
+    if (!PaddedTy || PaddedTy->getNumElements() != 2)
+      return false;
+
+    if (!CGF.CGM.getTargetCodeGenInfo().isHLSLPadding(
+            PaddedTy->getElementType(1)))
+      return false;
+
+    llvm::Type *ElementTy = ST->getElementType(1);
+    if (PaddedTy->getElementType(0) != ElementTy)
+      return false;
+
+    // All but the last of the logical array elements are in the padded array.
+    unsigned NumElts = PaddedEltsTy->getNumElements() + 1;
+
+    // Add an extra indirection to the load for the struct and walk the
+    // array prefix.
+    CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, 0));
+    for (unsigned I = 0; I < NumElts - 1; ++I) {
+      // We need to copy the element itself, without the padding.
+      CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, I));
+      emitCopyAtIndices(ElementTy, I, 0);
+      CurLoadIndices.pop_back();
+    }
+    CurLoadIndices.pop_back();
+
+    // Now copy the last element.
+    emitCopyAtIndices(ElementTy, NumElts - 1, 1);
+
+    return true;
+  }
+
+  bool processStruct(llvm::Type *FieldTy) {
+    auto *ST = dyn_cast<llvm::StructType>(FieldTy);
+    if (!ST)
+      return false;
+
+    unsigned Skipped = 0;
+    for (unsigned I = 0, E = ST->getNumElements(); I < E; ++I) {
+      llvm::Type *ElementTy = ST->getElementType(I);
+      if (CGF.CGM.getTargetCodeGenInfo().isHLSLPadding(ElementTy))
+        ++Skipped;
+      else
+        emitCopyAtIndices(ElementTy, I, I + Skipped);
+    }
+    return true;
+  }
+
+public:
+  HLSLBufferCopyEmitter(CodeGenFunction &CGF, Address DestPtr, Address SrcPtr)
+      : CGF(CGF), DestPtr(DestPtr), SrcPtr(SrcPtr) {}
+
+  bool emitCopy(QualType CType) {
+    LayoutTy = HLSLBufferLayoutBuilder(CGF.CGM).layOutType(CType);
+
+    LLVM_DEBUG({
+      dbgs() << "Emitting copy of ";
+      LayoutTy->print(dbgs());
+      dbgs() << "\n";
+    });
+
+    // If we don't have an aggregate, we can just fall back to normal memcpy.
+    if (!LayoutTy->isAggregateType())
+      return false;
+
+    emitCopyAtIndices(LayoutTy, 0, 0);
+    return true;
+  }
+};
+} // namespace
+
+bool CGHLSLRuntime::emitBufferCopy(CodeGenFunction &CGF, Address DestPtr,
+                                   Address SrcPtr, QualType CType) {
+  return HLSLBufferCopyEmitter(CGF, DestPtr, SrcPtr).emitCopy(CType);
+}
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 7c6c2850fd4d4..29d21646f7448 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -15,20 +15,19 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_CGHLSLRUNTIME_H
 #define LLVM_CLANG_LIB_CODEGEN_CGHLSLRUNTIME_H
 
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IntrinsicsDirectX.h"
-#include "llvm/IR/IntrinsicsSPIRV.h"
-
+#include "Address.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/Decl.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/HLSLRuntime.h"
-
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Frontend/HLSL/HLSLResource.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsDirectX.h"
+#include "llvm/IR/IntrinsicsSPIRV.h"
 
 #include <optional>
 #include <vector>
@@ -186,16 +185,18 @@ class CGHLSLRuntime {
 
   llvm::Instruction *getConvergenceToken(llvm::BasicBlock &BB);
 
-  llvm::TargetExtType *
-  getHLSLBufferLayoutType(const RecordType *LayoutStructTy);
+  llvm::StructType *getHLSLBufferLayoutType(const RecordType *LayoutStructTy);
   void addHLSLBufferLayoutType(const RecordType *LayoutStructTy,
-                               llvm::TargetExtType *LayoutTy);
+                               llvm::StructType *LayoutTy);
   void emitInitListOpaqueValues(CodeGenFunction &CGF, InitListExpr *E);
 
   std::optional<LValue>
   emitResourceArraySubscriptExpr(const ArraySubscriptExpr *E,
                                  CodeGenFunction &CGF);
 
+  bool emitBufferCopy(CodeGenFunction &CGF, Address DestPtr, Address SrcPtr,
+                      QualType CType);
+
 private:
   void emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl,
                                     llvm::GlobalVariable *BufGV);
@@ -203,7 +204,7 @@ class CGHLSLRuntime {
                                    llvm::GlobalVariable *GV);
   llvm::Triple::ArchType getArch();
 
-  llvm::DenseMap<const clang::RecordType *, llvm::TargetExtType *> LayoutTypes;
+  llvm::DenseMap<const clang::RecordType *, llvm::StructType *> LayoutTypes;
 };
 
 } // namespace CodeGen
diff --git a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp
index 838903cdcd1ee..99b5602b18bff 100644
--- a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp
+++ b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp
@@ -9,6 +9,7 @@
 #include "HLSLBufferLayoutBuilder.h"
 #include "CGHLSLRuntime.h"
 #include "CodeGenModule.h"
+#include "TargetInfo.h"
 #include "clang/AST/Type.h"
 #include <climits>
 
@@ -19,71 +20,21 @@
 
 using namespace clang;
 using namespace clang::CodeGen;
-using llvm::hlsl::CBufferRowSizeInBytes;
 
-namespace {
-
-// Creates a new array type with the same dimentions but with the new
-// element type.
-static llvm::Type *
-createArrayWithNewElementType(CodeGenModule &CGM,
-                              const ConstantArrayType *ArrayType,
-                              llvm::Type *NewElemType) {
-  const clang::Type *ArrayElemType = ArrayType->getArrayElementTypeNoTypeQual();
-  if (ArrayElemType->isConstantArrayType())
-    NewElemType = createArrayWithNewElementType(
-        CGM, cast<const ConstantArrayType>(ArrayElemType), NewElemType);
-  return llvm::ArrayType::get(NewElemType, ArrayType->getSExtSize());
-}
-
-// Returns the size of a scalar or vector in bytes
-static unsigned getScalarOrVectorSizeInBytes(llvm::Type *Ty) {
-  assert(Ty->isVectorTy() || Ty->isIntegerTy() || Ty->isFloatingPointTy());
-  if (Ty->isVectorTy()) {
-    llvm::FixedVectorType *FVT = cast<llvm::FixedVectorType>(Ty);
-    return FVT->getNumElements() *
-           (FVT->getElementType()->getScalarSizeInBits() / 8);
-  }
-  return Ty->getScalarSizeInBits() / 8;
-}
-
-} // namespace
+static const CharUnits CBufferRowSize =
+    CharUnits::fromQuantity(llvm::hlsl::CBufferRowSizeInBytes);
 
 namespace clang {
 namespace CodeGen {
 
-// Creates a layout type for given struct or class with HLSL constant buffer
-// layout taking into account PackOffsets, if provided.
-// Previously created layout types are cached by CGHLSLRuntime.
-//
-// The function iterates over all fields of the record type (including base
-// classes) and calls layoutField to converts each field to its corresponding
-// LLVM type and to calculate its HLSL constant buffer layout. Any embedded
-// structs (or arrays of structs) are converted to target layout types as well.
-//
-// When PackOffsets are specified the elements will be placed based on the
-// user-specified offsets. Not all elements must have a packoffset/register(c#)
-// annotation though. For those that don't, the PackOffsets array will contain
-// -1 value instead. These elements must be placed at the end of the layout
-// after all of the elements with specific offset.
-llvm::TargetExtType *HLSLBufferLayoutBuilder::createLayoutType(
+llvm::StructType *HLSLBufferLayoutBuilder::layOutStruct(
     const RecordType *RT, const llvm::SmallVector<int32_t> *PackOffsets) {
 
   // check if we already have the layout type for this struct
-  if (llvm::TargetExtType *Ty =
-          CGM.getHLSLRuntime().getHLSLBufferLayoutType(RT))
+  // TODO: Do we need to check for matching PackOffsets?
+  if (llvm::StructType *Ty = CGM.getHLSLRuntime().getHLSLBufferLayoutType(RT))
     return Ty;
 
-  SmallVector<unsigned> Layout;
-  SmallVector<llvm::Type *> LayoutElements;
-  unsigned Index = 0; // packoffset index
-  unsigned EndOffset = 0;
-
-  SmallVector<std::pair<const FieldDecl *, unsigned>> DelayLayoutFields;
-
-  // reserve first spot in the layout vector for buffer size
-  Layout.push_back(0);
-
   // iterate over all fields of the record, including fields on base classes
   llvm::SmallVector<CXXRecordDecl *> RecordDecls;
   RecordDecls.push_back(RT->castAsCXXRecordDecl());
@@ -94,179 +45,102 @@ llvm::TargetExtType *HLSLBufferLayoutBuilder::createLayoutType(
     RecordDecls.push_back(D->bases_begin()->getType()->castAsCXXRecordDecl());
   }
 
-  unsigned FieldOffset;
-  llvm::Type *FieldType;
+  SmallVector<llvm::Type *> Layout;
+  SmallVector<const FieldDecl *> DelayLayoutFields;
+  CharUnits CurrentOffset = CharUnits::Zero();
+  auto LayOutField = [&](QualType FieldType) {
+    llvm::Type *LayoutType = layOutType(FieldType);
+
+    const llvm::DataLayout &DL = CGM.getDataLayout();
+    CharUnits Size =
+        CharUnits::fromQuantity(DL.getTypeSizeInBits(LayoutType) / 8);
+    CharUnits Align = CharUnits::fromQuantity(DL.getABITypeAlign(LayoutType));
+
+    if (LayoutType->isAggregateType() ||
+        (CurrentOffset % CBufferRowSize) + Size > CBufferRowSize)
+      Align = Align.alignTo(CBufferRowSize);
+
+    CharUnits NextOffset = CurrentOffset.alignTo(Align);
+    if (NextOffset > CurrentOffset) {
+      llvm::Type *Padding = CGM.getTargetCodeGenInfo().getHLSLPadding(
+          CGM, NextOffset - CurrentOffset);
+      Layout.emplace_back(Padding);
+      CurrentOffset = NextOffset;
+    }
+    Layout.emplace_back(LayoutType);
+    CurrentOffset += Size;
+  };
 
+  unsigned PackOffsetIndex = 0;
   while (!RecordDecls.empty()) {
     const CXXRecordDecl *RD = RecordDecls.pop_back_val();
 
     for (const auto *FD : RD->fields()) {
-      assert((!PackOffsets || Index < PackOffsets->size()) &&
+      assert((!PackOffsets || PackOffsetIndex < PackOffsets->size()) &&
              "number of elements in layout struct does not match number of "
              "packoffset annotations");
 
       // No PackOffset info at all, or have a valid packoffset/register(c#)
       // annotations value -> layout the field.
-      const int PO = PackOffsets ? (*PackOffsets)[Index++] : -1;
-      if (!PackOffsets || PO != -1) {
-        if (!layoutField(FD, EndOffset, FieldOffset, FieldType, PO))
-          return nullptr;
-        Layout.push_back(FieldOffset);
-        LayoutElements.push_back(FieldType);
+      const int PO = PackOffsets ? (*PackOffsets)[PackOffsetIndex++] : -1;
+      if (PO != -1) {
+        LayOutField(FD->getType());
         continue;
       }
       // Have PackOffset info, but there is no packoffset/register(cX)
       // annotation on this field. Delay the layout until after all of the
       // other elements with packoffsets/register(cX) are processed.
-      DelayLayoutFields.emplace_back(FD, LayoutElements.size());
-      // reserve space for this field in the layout vector and elements list
-      Layout.push_back(UINT_MAX);
-      LayoutElements.push_back(nullptr);
+      DelayLayoutFields.emplace_back(FD);
     }
   }
 
   // process delayed layouts
-  for (auto I : DelayLayoutFields) {
-    const FieldDecl *FD = I.first;
-    const unsigned IndexInLayoutElements = I.second;
-    // the first item in layout vector is size, so we need to offset the index
-    // by 1
-    const unsigned IndexInLayout = IndexInLayoutElements + 1;
-    assert(Layout[IndexInLayout] == UINT_MAX &&
-           LayoutElements[IndexInLayoutElements] == nullptr);
-
-    if (!layoutField(FD, EndOffset, FieldOffset, FieldType))
-      return nullptr;
-    Layout[IndexInLayout] = FieldOffset;
-    LayoutElements[IndexInLayoutElements] = FieldType;
-  }
+  for (const FieldDecl *FD : DelayLayoutFields)
+    LayOutField(FD->getType());
 
-  // set the size of the buffer
-  Layout[0] = EndOffset;
-
-  // create the layout struct type; anonymous struct have empty name but
+  // Create the layout struct type; anonymous structs have empty name but
   // non-empty qualified name
   const auto *Decl = RT->castAsCXXRecordDecl();
   std::string Name =
       Decl->getName().empty() ? "anon" : Decl->getQualifiedNameAsString();
-  llvm::StructType *StructTy =
-      llvm::StructType::create(LayoutElements, Name, true);
 
-  // create target layout type
-  llvm::TargetExtType *NewLayoutTy = llvm::TargetExtType::get(
-      CGM.getLLVMContext(), LayoutTypeName, {StructTy}, Layout);
-  if (NewLayoutTy)
-    CGM.getHLSLRuntime().addHLSLBufferLayoutType(RT, NewLayoutTy);
-  return NewLayoutTy;
+  llvm::StructType *NewTy = llvm::StructType::create(Layout, Name,
+                                                     /*isPacked=*/true);
+  CGM.getHLSLRuntime().addHLSLBufferLayoutType(RT, NewTy);
+  return NewTy;
 }
 
-// The function converts a single field of HLSL Buffer to its corresponding
-// LLVM type and calculates it's layout. Any embedded structs (or
-// arrays of structs) are converted to target layout types as well.
-// The converted type is set to the FieldType parameter, the element
-// offset is set to the FieldOffset parameter. The EndOffset (=size of the
-// buffer) is also updated accordingly to the offset just after the placed
-// element, unless the incoming EndOffset already larger (may happen in case
-// of unsorted packoffset annotations).
-// Returns true if the conversion was successful.
-// The packoffset parameter contains the field's layout offset provided by the
-// user or -1 if there was no packoffset (or register(cX)) annotation.
-bool HLSLBufferLayoutBuilder::layoutField(const FieldDecl *FD,
-                                          unsigned &EndOffset,
-                                          unsigned &FieldOffset,
-                                          llvm::Type *&FieldType,
-                                          int Packoffset) {
-
-  // Size of element; for arrays this is a size of a single element in the
-  // array. Total array size of calculated as (ArrayCount-1) * ArrayStride +
-  // ElemSize.
-  unsigned ElemSize = 0;
-  unsigned ElemOffset = 0;
-  unsigned ArrayCount = 1;
-  unsigned ArrayStride = 0;
-
-  unsigned NextRowOffset = llvm::alignTo(EndOffset, CBufferRowSizeInBytes);
-
-  llvm::Type *ElemLayoutTy = nullptr;
-  QualType FieldTy = FD->getType();
-
-  if (FieldTy->isConstantArrayType()) {
-    // Unwrap array to find the element type and get combined array size.
-    QualType Ty = FieldTy;
-    while (Ty->isConstantArrayType()) {
-      auto *ArrayTy = CGM.getContext().getAsConstantArrayType(Ty);
-      ArrayCount *= ArrayTy->getSExtSize();
-      Ty = ArrayTy->getElementType();
-    }
-    // For array of structures, create a new array with a layout type
-    // instead of the structure type.
-    if (Ty->isStructureOrClassType()) {
-      llvm::Type *NewTy = cast<llvm::TargetExtType>(
-          createLayoutType(Ty->getAsCanonical<RecordType>()));
-      if (!NewTy)
-        return false;
-      assert(isa<llvm::TargetExtType>(NewTy) && "expected target type");
-      ElemSize = cast<llvm::TargetExtType>(NewTy)->getIntParameter(0);
-      ElemLayoutTy = createArrayWithNewElementType(
-          CGM, cast<ConstantArrayType>(FieldTy.getTypePtr()), NewTy);
-    } else {
-      // Array of vectors or scalars
-      ElemSize =
-          getScalarOrVectorSizeInBytes(CGM.getTypes().ConvertTypeForMem(Ty));
-      ElemLayoutTy = CGM.getTypes().ConvertTypeForMem(FieldTy);
-    }
-    ArrayStride = llvm::alignTo(ElemSize, CBufferRowSizeInBytes);
-    ElemOffset = (Packoffset != -1) ? Packoffset : NextRowOffset;
-
-  } else if (FieldTy->isStructureOrClassType()) {
-    // Create a layout type for the structure
-    ElemLayoutTy = createLayoutType(
-        cast<RecordType>(FieldTy->getAsCanonical<RecordType>()));
-    if (!ElemLayoutTy)
-      return false;
-    assert(isa<llvm::TargetExtType>(ElemLayoutTy) && "expected target type");
-    ElemSize = cast<llvm::TargetExtType>(ElemLayoutTy)->getIntParameter(0);
-    ElemOffset = (Packoffset != -1) ? Packoffset : NextRowOffset;
-
-  } else {
-    // scalar or vector - find element size and alignment
-    unsigned Align = 0;
-    ElemLayoutTy = CGM.getTypes().ConvertTypeForMem(FieldTy);
-    if (ElemLayoutTy->isVectorTy()) {
-      // align vectors by sub element size
-      const llvm::FixedVectorType *FVT =
-          cast<llvm::FixedVectorType>(ElemLayoutTy);
-      unsigned SubElemSize = FVT->getElementType()->getScalarSizeInBits() / 8;
-      ElemSize = FVT->getNumElements() * SubElemSize;
-      Align = SubElemSize;
-    } else {
-      assert(ElemLayoutTy->isIntegerTy() || ElemLayoutTy->isFloatingPointTy());
-      ElemSize = ElemLayoutTy->getScalarSizeInBits() / 8;
-      Align = ElemSize;
-    }
+llvm::Type *HLSLBufferLayoutBuilder::layOutArray(const ConstantArrayType *AT) {
+  llvm::Type *EltTy = layOutType(AT->getElementType());
+  uint64_t Count = AT->getZExtSize();
+
+  CharUnits EltSize =
+      CharUnits::fromQuantity(CGM.getDataLayout().getTypeSizeInBits(EltTy) / 8);
+  CharUnits Padding = EltSize.alignTo(CBufferRowSize) - EltSize;
+
+  // If we don't have any padding between elements then we just need the array
+  // itself.
+  if (Count < 2 || Padding.isZero())
+    return llvm::ArrayType::get(EltTy, Count);
+
+  llvm::LLVMContext &Context = CGM.getLLVMContext();
+  llvm::Type *PaddingTy =
+      CGM.getTargetCodeGenInfo().getHLSLPadding(CGM, Padding);
+  auto *PaddedEltTy =
+      llvm::StructType::get(Context, {EltTy, PaddingTy}, /*isPacked=*/true);
+  return llvm::StructType::get(
+      Context, {llvm::ArrayType::get(PaddedEltTy, Count - 1), EltTy},
+      /*IsPacked=*/true);
+}
 
-    // calculate or get element offset for the vector or scalar
-    if (Packoffset != -1) {
-      ElemOffset = Packoffset;
-    } else {
-      ElemOffset = llvm::alignTo(EndOffset, Align);
-      // if the element does not fit, move it to the next row
-      if (ElemOffset + ElemSize > NextRowOffset)
-        ElemOffset = NextRowOffset;
-    }
-  }
+llvm::Type *HLSLBufferLayoutBuilder::layOutType(QualType Ty) {
+  if (const auto *AT = CGM.getContext().getAsConstantArrayType(Ty))
+    return layOutArray(AT);
 
-  // Update end offset of the layout; do not update it if the EndOffset
-  // is already bigger than the new value (which may happen with unordered
-  // packoffset annotations)
-  unsigned NewEndOffset =
-      ElemOffset + (ArrayCount - 1) * ArrayStride + ElemSize;
-  EndOffset = std::max<unsigned>(EndOffset, NewEndOffset);
+  if (Ty->isStructureOrClassType())
+    return layOutStruct(Ty->getAsCanonical<RecordType>());
 
-  // add the layout element and offset to the lists
-  FieldOffset = ElemOffset;
-  FieldType = ElemLayoutTy;
-  return true;
+  return CGM.getTypes().ConvertTypeForMem(Ty);
 }
 
 } // namespace CodeGen
diff --git a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h
index 61240b280cfcb..0515b469f8b03 100644
--- a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h
+++ b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h
@@ -6,13 +6,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "clang/AST/TypeBase.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/DerivedTypes.h"
 
 namespace clang {
-class RecordType;
-class FieldDecl;
-
 namespace CodeGen {
 class CodeGenModule;
 
@@ -24,23 +22,36 @@ class CodeGenModule;
 class HLSLBufferLayoutBuilder {
 private:
   CodeGenModule &CGM;
-  llvm::StringRef LayoutTypeName;
 
 public:
-  HLSLBufferLayoutBuilder(CodeGenModule &CGM, llvm::StringRef LayoutTypeName)
-      : CGM(CGM), LayoutTypeName(LayoutTypeName) {}
-
-  // Returns LLVM target extension type with the name LayoutTypeName
-  // for given structure type and layout data. The first number in
-  // the Layout is the size followed by offsets for each struct element.
-  llvm::TargetExtType *
-  createLayoutType(const RecordType *StructType,
-                   const llvm::SmallVector<int32_t> *Packoffsets = nullptr);
-
-private:
-  bool layoutField(const clang::FieldDecl *FD, unsigned &EndOffset,
-                   unsigned &FieldOffset, llvm::Type *&FieldType,
-                   int Packoffset = -1);
+  HLSLBufferLayoutBuilder(CodeGenModule &CGM) : CGM(CGM) {}
+
+  /// Lays out a struct type following HLSL buffer rules and considering
+  /// PackOffsets, if provided. Previously created layout structs are cached by
+  /// CGHLSLRuntime.
+  ///
+  /// The function iterates over all fields of the record type (including base
+  /// classes) and calls layoutField to converts each field to its corresponding
+  /// LLVM type and to calculate its HLSL constant buffer layout. Any embedded
+  /// structs (or arrays of structs) are converted to layout types as well.
+  ///
+  /// When PackOffsets are specified the elements will be placed based on the
+  /// user-specified offsets. Not all elements must have a
+  /// packoffset/register(c#) annotation though. For those that don't, the
+  /// PackOffsets array will contain -1 value instead. These elements must be
+  /// placed at the end of the layout after all of the elements with specific
+  /// offset.
+  llvm::StructType *
+  layOutStruct(const RecordType *StructType,
+               const llvm::SmallVector<int32_t> *Packoffsets = nullptr);
+
+  /// Lays out an array type following HLSL buffer rules.
+  llvm::Type *
+  layOutArray(const ConstantArrayType *AT);
+
+  /// Lays out a type following HLSL buffer rules. Arrays and structures will be
+  /// padded appropriately and nested objects will be converted as appropriate.
+  llvm::Type *layOutType(QualType Type);
 };
 
 } // namespace CodeGen
diff --git a/clang/lib/CodeGen/Targets/DirectX.cpp b/clang/lib/CodeGen/Targets/DirectX.cpp
index d44e531dacdff..6f6eb8beed5b9 100644
--- a/clang/lib/CodeGen/Targets/DirectX.cpp
+++ b/clang/lib/CodeGen/Targets/DirectX.cpp
@@ -88,10 +88,9 @@ llvm::Type *DirectXTargetCodeGenInfo::getHLSLType(
     if (ContainedTy.isNull() || !ContainedTy->isStructureType())
       return nullptr;
 
-    llvm::Type *BufferLayoutTy =
-        HLSLBufferLayoutBuilder(CGM, "dx.Layout")
-            .createLayoutType(ContainedTy->castAsCanonical<RecordType>(),
-                              Packoffsets);
+    llvm::StructType *BufferLayoutTy =
+        HLSLBufferLayoutBuilder(CGM).layOutStruct(
+            ContainedTy->getAsCanonical<RecordType>(), Packoffsets);
     if (!BufferLayoutTy)
       return nullptr;
 
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index abad3c78ea1a2..cd921a65e29d6 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -545,10 +545,9 @@ llvm::Type *CommonSPIRTargetCodeGenInfo::getHLSLType(
     if (ContainedTy.isNull() || !ContainedTy->isStructureType())
       return nullptr;
 
-    llvm::Type *BufferLayoutTy =
-        HLSLBufferLayoutBuilder(CGM, "spirv.Layout")
-            .createLayoutType(ContainedTy->castAsCanonical<RecordType>(),
-                              Packoffsets);
+    llvm::StructType *BufferLayoutTy =
+        HLSLBufferLayoutBuilder(CGM).layOutStruct(
+            ContainedTy->getAsCanonical<RecordType>(), Packoffsets);
     uint32_t StorageClass = /* Uniform storage class */ 2;
     return llvm::TargetExtType::get(Ctx, "spirv.VulkanBuffer", {BufferLayoutTy},
                                     {StorageClass, false});
diff --git a/clang/test/CodeGenHLSL/ArrayAssignable.hlsl b/clang/test/CodeGenHLSL/ArrayAssignable.hlsl
index aaa486eff10b7..0a71801a860d2 100644
--- a/clang/test/CodeGenHLSL/ArrayAssignable.hlsl
+++ b/clang/test/CodeGenHLSL/ArrayAssignable.hlsl
@@ -5,18 +5,19 @@ struct S {
   float f;
 };
 
-// CHECK: [[CBLayout:%.*]] = type <{ [2 x float], [2 x <4 x i32>], [2 x [2 x i32]], [1 x target("dx.Layout", %S, 8, 0, 4)] }>
-// CHECK: @CBArrays.cb = global target("dx.CBuffer", target("dx.Layout", [[CBLayout]], 136, 0, 32, 64, 128))
-// CHECK: @c1 = external hidden addrspace(2) global [2 x float], align 4
+// CHECK: [[CBLayout:%.*]] = type <{ <{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, target("dx.Padding", 12), [2 x <4 x i32>], <{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, target("dx.Padding", 12), <{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }> }>
+
+// CHECK: @CBArrays.cb = global target("dx.CBuffer", [[CBLayout]])
+// CHECK: @c1 = external hidden addrspace(2) global <{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, align 4
 // CHECK: @c2 = external hidden addrspace(2) global [2 x <4 x i32>], align 16
-// CHECK: @c3 = external hidden addrspace(2) global [2 x [2 x i32]], align 4
-// CHECK: @c4 = external hidden addrspace(2) global [1 x target("dx.Layout", %S, 8, 0, 4)], align 1
+// CHECK: @c3 = external hidden addrspace(2) global <{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, align 4
+// CHECK: @c4 = external hidden addrspace(2) global <{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }>, align 1
 
 cbuffer CBArrays : register(b0) {
   float c1[2];
   int4 c2[2];
   int c3[2][2];
-  S c4[1];
+  S c4[2];
 }
 
 // CHECK-LABEL: define hidden void {{.*}}arr_assign1
@@ -140,40 +141,71 @@ void arr_assign7() {
 
 // CHECK-LABEL: define hidden void {{.*}}arr_assign8
 // CHECK: [[C:%.*]] = alloca [2 x float], align 4
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[C]], ptr align 4 {{.*}}, i32 8, i1 false)
-// CHECK-NEXT: call void @llvm.memcpy.p0.p2.i32(ptr align 4 [[C]], ptr addrspace(2) align 4 @c1, i32 8, i1 false)
+// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x float], ptr [[C]], i32 0
+// CHECK-NEXT: [[L0:%.*]] = load float, ptr addrspace(2) @c1, align 4
+// CHECK-NEXT: store float [[L0]], ptr [[V0]], align 4
+// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x float], ptr [[C]], i32 0, i32 1
+// CHECK-NEXT: [[L1:%.*]] = load float, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, ptr addrspace(2) @c1, i32 0, i32 1), align 4
+// CHECK-NEXT: store float [[L1]], ptr [[V1]], align 4
 // CHECK-NEXT: ret void
 void arr_assign8() {
-  float C[2] = {1.0, 2.0};
+  float C[2];
   C = c1;
 }
 
+// TODO: A memcpy would actually be valid here, since everything is aligned on
+// 16 byte boundaries.
+//
 // CHECK-LABEL: define hidden void {{.*}}arr_assign9
 // CHECK: [[C:%.*]] = alloca [2 x <4 x i32>], align 16
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[C]], ptr align 16 {{.*}}, i32 32, i1 false)
-// CHECK-NEXT: call void @llvm.memcpy.p0.p2.i32(ptr align 16 [[C]], ptr addrspace(2) align 16 @c2, i32 32, i1 false)
+// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[C]], i32 0
+// CHECK-NEXT: [[L0:%.*]] = load <4 x i32>, ptr addrspace(2) @c2, align 16
+// CHECK-NEXT: store <4 x i32> [[L0]], ptr [[V0]], align 16
+// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[C]], i32 0, i32 1
+// CHECK-NEXT: [[L1:%.*]] = load <4 x i32>, ptr addrspace(2) getelementptr inbounds ([2 x <4 x i32>], ptr addrspace(2) @c2, i32 0, i32 1), align 16
+// CHECK-NEXT: store <4 x i32> [[L1]], ptr [[V1]], align 16
 // CHECK-NEXT: ret void
 void arr_assign9() {
-  int4 C[2] = {1,2,3,4,5,6,7,8};
+  int4 C[2];
   C = c2;
 }
 
 // CHECK-LABEL: define hidden void {{.*}}arr_assign10
 // CHECK: [[C:%.*]] = alloca [2 x [2 x i32]], align 4
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[C]], ptr align 4 {{.*}}, i32 16, i1 false)
-// CHECK-NEXT: call void @llvm.memcpy.p0.p2.i32(ptr align 4 [[C]], ptr addrspace(2) align 4 @c3, i32 16, i1 false)
+// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 0
+// CHECK-NEXT: [[L0:%.*]] = load i32, ptr addrspace(2) @c3, align 4
+// CHECK-NEXT: store i32 [[L0]], ptr [[V0]], align 4
+// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 0, i32 1
+// CHECK-NEXT: [[L1:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, ptr addrspace(2) @c3, i32 0, i32 0, i32 0, i32 1), align 4
+// CHECK-NEXT: store i32 [[L1]], ptr [[V1]], align 4
+// CHECK-NEXT: [[V2:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 1, i32 0
+// CHECK-NEXT: [[L2:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, ptr addrspace(2) @c3, i32 0, i32 1, i32 0, i32 0), align 4
+// CHECK-NEXT: store i32 [[L2]], ptr [[V2]], align 4
+// CHECK-NEXT: [[V3:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 1, i32 1
+// CHECK-NEXT: [[L3:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, ptr addrspace(2) @c3, i32 0, i32 1, i32 1), align 4
+// CHECK-NEXT: store i32 [[L3]], ptr [[V3]], align 4
 // CHECK-NEXT: ret void
 void arr_assign10() {
-  int C[2][2] = {1,2,3,4};
+  int C[2][2];
   C = c3;
 }
 
 // CHECK-LABEL: define hidden void {{.*}}arr_assign11
-// CHECK: [[C:%.*]] = alloca [1 x %struct.S], align 1
-// CHECK: call void @llvm.memcpy.p0.p2.i32(ptr align 1 [[C]], ptr addrspace(2) align 1 @c4, i32 8, i1 false)
+// CHECK: [[C:%.*]] = alloca [2 x %struct.S], align 1
+// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 0
+// CHECK-NEXT: [[L0:%.*]] = load i32, ptr addrspace(2) @c4, align 4
+// CHECK-NEXT: store i32 [[L0]], ptr [[V0]], align 4
+// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 0, i32 1
+// CHECK-NEXT: [[L1:%.*]] = load float, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }>, ptr addrspace(2) @c4, i32 0, i32 0, i32 0, i32 1), align 4
+// CHECK-NEXT: store float [[L1]], ptr [[V1]], align 4
+// CHECK-NEXT: [[V2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 1, i32 0
+// CHECK-NEXT: [[L2:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }>, ptr addrspace(2) @c4, i32 0, i32 1, i32 0), align 4
+// CHECK-NEXT: store i32 [[L2]], ptr [[V2]], align 4
+// CHECK-NEXT: [[V3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 1, i32 1
+// CHECK-NEXT: [[L3:%.*]] = load float, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }>, ptr addrspace(2) @c4, i32 0, i32 1, i32 1), align 4
+// CHECK-NEXT: store float [[L3]], ptr [[V3]], align 4
 // CHECK-NEXT: ret void
 void arr_assign11() {
-  S s = {1, 2.0};
-  S C[1] = {s};
+  S C[2];
   C = c4;
 }
diff --git a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
index b36682e065b3a..df82e8201ba55 100644
--- a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
@@ -37,9 +37,9 @@ void main(unsigned GI : SV_GroupIndex) {}
 // INLINE-NEXT:   alloca
 // INLINE-NEXT:   store i32 12
 // INLINE-NEXT:   store i32 13
-// INLINE-NEXT:   %[[HANDLE:.*]] = call target("dx.CBuffer", target("dx.Layout", %"__cblayout_$Globals", 4, 0))
-// INLINE-NEXT-SAME: @"llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_$Globalss_4_0tt"(i32 0, i32 0, i32 1, i32 0, i1 false)
-// INLINE-NEXT:   store target("dx.CBuffer", target("dx.Layout", %"__cblayout_$Globals", 4, 0)) %[[HANDLE]], ptr @"$Globals.cb", align 4
+// INLINE-NEXT:   %[[HANDLE:.*]] = call target("dx.CBuffer", %"__cblayout_$Globals")
+// INLINE-NEXT-SAME: @"llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_$Globalsst"(i32 0, i32 0, i32 1, i32 0, i1 false, ptr @"$Globals.str")
+// INLINE-NEXT:   store target("dx.CBuffer", %"__cblayout_$Globals") %[[HANDLE]], ptr @"$Globals.cb", align 4
 // INLINE-NEXT:   %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
 // INLINE-NEXT:   store i32 %
 // INLINE-NEXT:   store i32 0
diff --git a/clang/test/CodeGenHLSL/resources/cbuffer.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer.hlsl
index 8dcff5dad9d13..ab37ae0bc4e68 100644
--- a/clang/test/CodeGenHLSL/resources/cbuffer.hlsl
+++ b/clang/test/CodeGenHLSL/resources/cbuffer.hlsl
@@ -1,37 +1,121 @@
 // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-compute -fnative-half-type -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
 
-// CHECK: %__cblayout_CBScalars = type <{ float, double, half, i64, i32, i16, i32, i64 }>
-// CHECK: %__cblayout_CBVectors = type <{ <3 x float>, <3 x double>, <2 x half>, <3 x i64>, <4 x i32>, <3 x i16>, <3 x i64> }>
-// CHECK: %__cblayout_CBArrays = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [2 x [3 x [4 x <4 x i32>]]], [1 x i16], [2 x i64], [4 x i32] }>
-// CHECK: %__cblayout_CBStructs = type <{ target("dx.Layout", %A, 8, 0), target("dx.Layout", %B, 14, 0, 8),
-// CHECK-SAME: target("dx.Layout", %C, 24, 0, 16), [5 x target("dx.Layout", %A, 8, 0)],
-// CHECK-SAME: target("dx.Layout", %__cblayout_D, 94, 0), half, <3 x i16> }>
+// CHECK: %__cblayout_CBScalars = type <{
+// CHECK-SAME:   float, target("dx.Padding", 4), double,
+// CHECK-SAME:   half, target("dx.Padding", 6), i64,
+// CHECK-SAME:   i32, i16, target("dx.Padding", 2), i32, target("dx.Padding", 4),
+// CHECK-SAME:   i64
+// CHECK-SAME: }>
+
+// CHECK: %__cblayout_CBVectors = type <{
+// CHECK-SAME:   <3 x float>, target("dx.Padding", 4),
+// CHECK-SAME:   <3 x double>, <2 x half>, target("dx.Padding", 4),
+// CHECK-SAME:   <3 x i64>, target("dx.Padding", 8),
+// CHECK-SAME:   <4 x i32>,
+// CHECK-SAME:   <3 x i16>, target("dx.Padding", 10),
+// CHECK-SAME:   <3 x i64>
+// CHECK-SAME: }>
+
+// CHECK: %__cblayout_CBArrays = type <{
+// CHECK-SAME: <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, target("dx.Padding", 12),
+// CHECK-SAME: <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, target("dx.Padding", 8),
+// CHECK-SAME: <{ [1 x <{
+// CHECK-SAME:   <{ [1 x <{ half, target("dx.Padding", 14) }>], half }>, target("dx.Padding", 14) }>],
+// CHECK-SAME:   <{ [1 x <{ half, target("dx.Padding", 14) }>], half }>
+// CHECK-SAME: }>, target("dx.Padding", 14),
+// CHECK-SAME: <{ [2 x <{ i64, target("dx.Padding", 8) }>], i64 }>, target("dx.Padding", 8),
+// CHECK-SAME: [2 x [3 x [4 x <4 x i32>]]]
+// CHECK-SAME: [1 x i16], target("dx.Padding", 14),
+// CHECK-SAME: <{ [1 x <{ i64, target("dx.Padding", 8) }>], i64 }>, target("dx.Padding", 8),
+// CHECK-SAME: <{ [3 x <{ i32, target("dx.Padding", 12) }>], i32 }>
+// CHECK-SAME: }>
+
+// CHECK: %__cblayout_CBStructs = type <{
+// CHECK-SAME:   %A, target("dx.Padding", 8),
+
+// TODO: We should have target("dx.Padding", 2) padding after %B, but we don't correctly handle
+// 2- and 3-element vectors inside structs yet because of DataLayout rules.
+// CHECK-SAME: %B,
+
+// CHECK-SAME:   %C, target("dx.Padding", 8),
+// CHECK-SAME:   <{ [4 x <{ %A, target("dx.Padding", 8) }>], %A }>, target("dx.Padding", 8),
+// CHECK-SAME:   %__cblayout_D, half,
+// CHECK-SAME:   <3 x i16>
+// CHECK-SAME: }>
 
 // CHECK: %A = type <{ <2 x float> }>
 // CHECK: %B = type <{ <2 x float>, <3 x i16> }>
-// CHECK: %C = type <{ i32, target("dx.Layout", %A, 8, 0) }>
-// CHECK: %__cblayout_D = type <{ [2 x [3 x target("dx.Layout", %B, 14, 0, 8)]] }>
+// CHECK: %C = type <{ i32, target("dx.Padding", 12), %A }>
+
+// CHECK: %__cblayout_D = type <{
+// CHECK-SAME:   <{ [1 x <{
+// CHECK-SAME:     <{ [2 x <{ %B, target("dx.Padding", 2) }>], %B }>, target("dx.Padding", 2)
+// CHECK-SAME:   }>],
+// CHECK-SAME:   <{ [2 x <{ %B, target("dx.Padding", 2) }>], %B }> }>
+// CHECK-SAME: }>
+
+// CHECK: %__cblayout_CBClasses = type <{
+// CHECK-SAME:   %K, target("dx.Padding", 12),
+// CHECK-SAME:   %L, target("dx.Padding", 8),
+// CHECK-SAME:   %M, target("dx.Padding", 12),
+// CHECK-SAME:   <{ [9 x <{ %K, target("dx.Padding", 12) }>], %K }>
+// CHECK-SAME: }>
 
-// CHECK: %__cblayout_CBClasses = type <{ target("dx.Layout", %K, 4, 0), target("dx.Layout", %L, 8, 0, 4),
-// CHECK-SAME: target("dx.Layout", %M, 68, 0), [10 x target("dx.Layout", %K, 4, 0)] }>
 // CHECK: %K = type <{ float }>
 // CHECK: %L = type <{ float, float }>
-// CHECK: %M = type <{ [5 x target("dx.Layout", %K, 4, 0)] }>
-
-// CHECK: %__cblayout_CBMix = type <{ [2 x target("dx.Layout", %Test, 8, 0, 4)], float, [3 x [2 x <2 x float>]], float,
-// CHECK-SAME: target("dx.Layout", %anon, 4, 0), double, target("dx.Layout", %anon.0, 8, 0), float, <1 x double>, i16 }>
+// CHECK: %M = type <{ <{ [4 x <{ %K, target("dx.Padding", 12) }>], %K }> }>
+
+// CHECK: %__cblayout_CBMix = type <{
+// CHECK-SAME:   <{ [1 x <{ %Test, target("dx.Padding", 8) }>], %Test }>, float, target("dx.Padding", 4)
+// CHECK-SAME:   <{ [2 x <{
+// CHECK-SAME:     <{ [1 x <{ <2 x float>, target("dx.Padding", 8) }>], <2 x float> }>, target("dx.Padding", 8) }>],
+// CHECK-SAME:     <{ [1 x <{ <2 x float>, target("dx.Padding", 8) }>], <2 x float> }>
+// CHECK-SAME:   }>, float, target("dx.Padding", 4),
+// CHECK-SAME:   %anon, target("dx.Padding", 4), double,
+// CHECK-SAME:   %anon.0, float, target("dx.Padding", 4),
+// CHECK-SAME:   <1 x double>, i16
+// CHECK-SAME: }>
 
 // CHECK: %Test = type <{ float, float }>
 // CHECK: %anon = type <{ float }>
 // CHECK: %anon.0 = type <{ <2 x i32> }>
 
-// CHECK: %__cblayout_CB_A = type <{ [2 x double], [3 x <3 x float>], float, [3 x double], half, [1 x <2 x double>], float, [2 x <3 x half>], <3 x half> }>
-// CHECK: %__cblayout_CB_B = type <{ [3 x <3 x double>], <3 x half> }>
-// CHECK: %__cblayout_CB_C = type <{ i32, target("dx.Layout", %F, 96, 0, 16, 28, 32, 56, 64, 80, 84, 90), half, target("dx.Layout", %G, 258, 0, 48, 64, 256), double }>
-
-// CHECK: %F = type <{ double, <3 x float>, float, <3 x double>, half, <2 x double>, float, <3 x half>, <3 x half> }>
-// CHECK: %G = type <{ target("dx.Layout", %E, 36, 0, 8, 16, 20, 22, 24, 32), [1 x float], [2 x target("dx.Layout", %F, 96, 0, 16, 28, 32, 56, 64, 80, 84, 90)], half }>
-// CHECK: %E = type <{ float, double, float, half, i16, i64, i32 }>
+// CHECK: %__cblayout_CB_A = type <{
+// CHECK-SAME:   <{ [1 x <{ double, target("dx.Padding", 8) }>], double }>, target("dx.Padding", 8),
+// CHECK-SAME:   <{ [2 x <{ <3 x float>, target("dx.Padding", 4) }>], <3 x float> }>, float,
+// CHECK-SAME:   <{ [2 x <{ double, target("dx.Padding", 8) }>], double }>, half, target("dx.Padding", 6),
+// CHECK-SAME:   [1 x <2 x double>],
+// CHECK-SAME:   float, target("dx.Padding", 12),
+// CHECK-SAME:   <{ [1 x <{ <3 x half>, target("dx.Padding", 10) }>], <3 x half> }>, <3 x half>
+// CHECK-SAME: }>
+
+// CHECK: %__cblayout_CB_B = type <{
+// CHECK-SAME: <{ [2 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, <3 x half>
+// CHECK-SAME: }>
+
+// CHECK: %__cblayout_CB_C = type <{
+// CHECK-SAME:   i32, target("dx.Padding", 12),
+// CHECK-SAME:   %F,
+// CHECK-SAME:   half, target("dx.Padding", 14),
+// CHECK-SAME:   %G, target("dx.Padding", 6), double
+// CHECK-SAME: }>
+
+// CHECK: %F = type <{
+// CHECK-SAME:   double, target("dx.Padding", 8),
+// CHECK-SAME:   <3 x float>, float,
+// CHECK-SAME:   <3 x double>, half, target("dx.Padding", 6),
+// CHECK-SAME:   <2 x double>,
+// CHECK-SAME:   float, <3 x half>, <3 x half>
+// CHECK-SAME: }>
+
+// CHECK: %G = type <{
+// CHECK-SAME:   %E, target("dx.Padding", 12),
+// CHECK-SAME:   [1 x float], target("dx.Padding", 12),
+// CHECK-SAME:   [2 x %F],
+// CHECK-SAME:   half
+// CHECK-SAME: }>
+
+// CHECK: %E = type <{ float, target("dx.Padding", 4), double, float, half, i16, i64, i32 }>
 
 cbuffer CBScalars : register(b1, space5) {
   float a1;
@@ -44,8 +128,7 @@ cbuffer CBScalars : register(b1, space5) {
   int64_t a8;
 }
 
-// CHECK: @CBScalars.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBScalars,
-// CHECK-SAME: 56, 0, 8, 16, 24, 32, 36, 40, 48))
+// CHECK: @CBScalars.cb = global target("dx.CBuffer", %__cblayout_CBScalars)
 // CHECK: @a1 = external hidden addrspace(2) global float, align 4
 // CHECK: @a2 = external hidden addrspace(2) global double, align 8
 // CHECK: @a3 = external hidden addrspace(2) global half, align 2
@@ -67,8 +150,7 @@ cbuffer CBVectors {
   // FIXME: add a bool vectors after llvm-project/llvm#91639 is added
 }
 
-// CHECK: @CBVectors.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBVectors,
-// CHECK-SAME: 136, 0, 16, 40, 48, 80, 96, 112))
+// CHECK: @CBVectors.cb = global target("dx.CBuffer", %__cblayout_CBVectors)
 // CHECK: @b1 = external hidden addrspace(2) global <3 x float>, align 16
 // CHECK: @b2 = external hidden addrspace(2) global <3 x double>, align 32
 // CHECK: @b3 = external hidden addrspace(2) global <2 x half>, align 4
@@ -89,16 +171,15 @@ cbuffer CBArrays : register(b2) {
   bool c8[4];
 }
 
-// CHECK: @CBArrays.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBArrays,
-// CHECK-SAME: 708, 0, 48, 112, 176, 224, 608, 624, 656))
-// CHECK: @c1 = external hidden addrspace(2) global [3 x float], align 4
-// CHECK: @c2 = external hidden addrspace(2) global [2 x <3 x double>], align 32
-// CHECK: @c3 = external hidden addrspace(2) global [2 x [2 x half]], align 2
-// CHECK: @c4 = external hidden addrspace(2) global [3 x i64], align 8
+// CHECK: @CBArrays.cb = global target("dx.CBuffer", %__cblayout_CBArrays)
+// CHECK: @c1 = external hidden addrspace(2) global <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, align 4
+// CHECK: @c2 = external hidden addrspace(2) global <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, align 32
+// CHECK: @c3 = external hidden addrspace(2) global <{ [1 x <{ <{ [1 x <{ half, target("dx.Padding", 14) }>], half }>, target("dx.Padding", 14) }>], <{ [1 x <{ half, target("dx.Padding", 14) }>], half }> }>, align 2
+// CHECK: @c4 = external hidden addrspace(2) global <{ [2 x <{ i64, target("dx.Padding", 8) }>], i64 }>, align 8
 // CHECK: @c5 = external hidden addrspace(2) global [2 x [3 x [4 x <4 x i32>]]], align 16
 // CHECK: @c6 = external hidden addrspace(2) global [1 x i16], align 2
-// CHECK: @c7 = external hidden addrspace(2) global [2 x i64], align 8
-// CHECK: @c8 = external hidden addrspace(2) global [4 x i32], align 4
+// CHECK: @c7 = external hidden addrspace(2) global <{ [1 x <{ i64, target("dx.Padding", 8) }>], i64 }>, align 8
+// CHECK: @c8 = external hidden addrspace(2) global <{ [3 x <{ i32, target("dx.Padding", 12) }>], i32 }>, align 4
 // CHECK: @CBArrays.str = private unnamed_addr constant [9 x i8] c"CBArrays\00", align 1
 
 typedef uint32_t4 uint32_t8[2];
@@ -110,8 +191,7 @@ cbuffer CBTypedefArray : register(space2) {
   T2 t2[2];
 }
 
-// CHECK: @CBTypedefArray.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBTypedefArray,
-// CHECK-SAME: 128, 0, 64))
+// CHECK: @CBTypedefArray.cb = global target("dx.CBuffer", %__cblayout_CBTypedefArray)
 // CHECK: @t1 = external hidden addrspace(2) global [2 x [2 x <4 x i32>]], align 16
 // CHECK: @t2 = external hidden addrspace(2) global [2 x [2 x <4 x i32>]], align 16
 // CHECK: @CBTypedefArray.str = private unnamed_addr constant [15 x i8] c"CBTypedefArray\00", align 1
@@ -135,13 +215,12 @@ struct D {
   Empty es;
 };
 
-// CHECK: @CBStructs.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBStructs,
-// CHECK-SAME: 246, 0, 16, 32, 64, 144, 238, 240))
-// CHECK: @a = external hidden addrspace(2) global target("dx.Layout", %A, 8, 0), align 1
-// CHECK: @b = external hidden addrspace(2) global target("dx.Layout", %B, 14, 0, 8), align 1
-// CHECK: @c = external hidden addrspace(2) global target("dx.Layout", %C, 24, 0, 16), align 1
-// CHECK: @array_of_A = external hidden addrspace(2) global [5 x target("dx.Layout", %A, 8, 0)], align 1
-// CHECK: @d = external hidden addrspace(2) global target("dx.Layout", %__cblayout_D, 94, 0), align 1
+// CHECK: @CBStructs.cb = global target("dx.CBuffer", %__cblayout_CBStructs)
+// CHECK: @a = external hidden addrspace(2) global %A, align 1
+// CHECK: @b = external hidden addrspace(2) global %B, align 1
+// CHECK: @c = external hidden addrspace(2) global %C, align 1
+// CHECK: @array_of_A = external hidden addrspace(2) global <{ [4 x <{ %A, target("dx.Padding", 8) }>], %A }>, align 1
+// CHECK: @d = external hidden addrspace(2) global %__cblayout_D, align 1
 // CHECK: @e = external hidden addrspace(2) global half, align 2
 // CHECK: @f = external hidden addrspace(2) global <3 x i16>, align 8
 // CHECK: @CBStructs.str = private unnamed_addr constant [10 x i8] c"CBStructs\00", align 1
@@ -176,27 +255,25 @@ cbuffer CBClasses {
   K ka[10];
 };
 
-// CHECK: @CBClasses.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBClasses,
-// CHECK-SAME: 260, 0, 16, 32, 112))
-// CHECK: @k = external hidden addrspace(2) global target("dx.Layout", %K, 4, 0), align 1
-// CHECK: @l = external hidden addrspace(2) global target("dx.Layout", %L, 8, 0, 4), align 1
-// CHECK: @m = external hidden addrspace(2) global target("dx.Layout", %M, 68, 0), align 1
-// CHECK: @ka = external hidden addrspace(2) global [10 x target("dx.Layout", %K, 4, 0)], align 1
+// CHECK: @CBClasses.cb = global target("dx.CBuffer", %__cblayout_CBClasses)
+// CHECK: @k = external hidden addrspace(2) global %K, align 1
+// CHECK: @l = external hidden addrspace(2) global %L, align 1
+// CHECK: @m = external hidden addrspace(2) global %M, align 1
+// CHECK: @ka = external hidden addrspace(2) global <{ [9 x <{ %K, target("dx.Padding", 12) }>], %K }>, align 1
 // CHECK: @CBClasses.str = private unnamed_addr constant [10 x i8] c"CBClasses\00", align 1
 
 struct Test {
     float a, b;
 };
 
-// CHECK: @CBMix.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBMix,
-// CHECK-SAME: 170, 0, 24, 32, 120, 128, 136, 144, 152, 160, 168))
-// CHECK: @test = external hidden addrspace(2) global [2 x target("dx.Layout", %Test, 8, 0, 4)], align 1
+// CHECK: @CBMix.cb = global target("dx.CBuffer", %__cblayout_CBMix)
+// CHECK: @test = external hidden addrspace(2) global <{ [1 x <{ %Test, target("dx.Padding", 8) }>], %Test }>, align 1
 // CHECK: @f1 = external hidden addrspace(2) global float, align 4
-// CHECK: @f2 = external hidden addrspace(2) global [3 x [2 x <2 x float>]], align 8
+// CHECK: @f2 = external hidden addrspace(2) global <{ [2 x <{ <{ [1 x <{ <2 x float>, target("dx.Padding", 8) }>], <2 x float> }>, target("dx.Padding", 8) }>], <{ [1 x <{ <2 x float>, target("dx.Padding", 8) }>], <2 x float> }> }>, align 8
 // CHECK: @f3 = external hidden addrspace(2) global float, align 4
-// CHECK: @f4 = external hidden addrspace(2) global target("dx.Layout", %anon, 4, 0), align 1
+// CHECK: @f4 = external hidden addrspace(2) global %anon, align 1
 // CHECK: @f5 = external hidden addrspace(2) global double, align 8
-// CHECK: @f6 = external hidden addrspace(2) global target("dx.Layout", %anon.0, 8, 0), align 1
+// CHECK: @f6 = external hidden addrspace(2) global %anon.0, align 1
 // CHECK: @f7 = external hidden addrspace(2) global float, align 4
 // CHECK: @f8 = external hidden addrspace(2) global <1 x double>, align 8
 // CHECK: @f9 = external hidden addrspace(2) global i16, align 2
@@ -215,7 +292,7 @@ cbuffer CBMix {
     uint16_t f9;
 };
 
-// CHECK: @CB_A.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_A, 188, 0, 32, 76, 80, 120, 128, 144, 160, 182))
+// CHECK: @CB_A.cb = global target("dx.CBuffer", %__cblayout_CB_A)
 
 cbuffer CB_A {
   double B0[2];
@@ -229,7 +306,7 @@ cbuffer CB_A {
   half3 B8;
 }
 
-// CHECK: @CB_B.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_B, 94, 0, 88))
+// CHECK: @CB_B.cb = global target("dx.CBuffer", %__cblayout_CB_B)
 cbuffer CB_B {
   double3 B9[3];
   half3 B10;
@@ -264,7 +341,7 @@ struct G {
   half C3;
 };
 
-// CHECK: @CB_C.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_C, 400, 0, 16, 112, 128, 392))
+// CHECK: @CB_C.cb = global target("dx.CBuffer", %__cblayout_CB_C)
 cbuffer CB_C {
   int D0;
   F D1;
@@ -275,63 +352,63 @@ cbuffer CB_C {
 
 // CHECK: define internal void @_init_buffer_CBScalars.cb()
 // CHECK-NEXT: entry:
-// CHECK-NEXT: %CBScalars.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBScalars, 56, 0, 8, 16, 24, 32, 36, 40, 48))
-// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBScalarss_56_0_8_16_24_32_36_40_48tt(i32 5, i32 1, i32 1, i32 0, ptr @CBScalars.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBScalars, 56, 0, 8, 16, 24, 32, 36, 40, 48)) %CBScalars.cb_h, ptr @CBScalars.cb, align 4
+// CHECK-NEXT: %CBScalars.cb_h = call target("dx.CBuffer", %__cblayout_CBScalars)
+// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_s___cblayout_CBScalarsst(i32 5, i32 1, i32 1, i32 0, ptr @CBScalars.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBScalars) %CBScalars.cb_h, ptr @CBScalars.cb, align 4
 
 // CHECK: define internal void @_init_buffer_CBVectors.cb()
 // CHECK-NEXT: entry:
-// CHECK-NEXT: %CBVectors.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBVectors, 136, 0, 16, 40, 48, 80, 96, 112))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBVectorss_136_0_16_40_48_80_96_112tt(i32 0, i32 0, i32 1, i32 0, ptr @CBVectors.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBVectors, 136, 0, 16, 40, 48, 80, 96, 112)) %CBVectors.cb_h, ptr @CBVectors.cb, align 4
+// CHECK-NEXT: %CBVectors.cb_h = call target("dx.CBuffer", %__cblayout_CBVectors)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBVectorsst(i32 0, i32 0, i32 1, i32 0, ptr @CBVectors.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBVectors) %CBVectors.cb_h, ptr @CBVectors.cb, align 4
 
 // CHECK: define internal void @_init_buffer_CBArrays.cb()
 // CHECK-NEXT: entry:
-// CHECK-NEXT: %CBArrays.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBArrays, 708, 0, 48, 112, 176, 224, 608, 624, 656))
-// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBArrayss_708_0_48_112_176_224_608_624_656tt(i32 0, i32 2, i32 1, i32 0, ptr @CBArrays.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBArrays, 708, 0, 48, 112, 176, 224, 608, 624, 656)) %CBArrays.cb_h, ptr @CBArrays.cb, align 4
+// CHECK-NEXT: %CBArrays.cb_h = call target("dx.CBuffer", %__cblayout_CBArrays)
+// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_s___cblayout_CBArraysst(i32 0, i32 2, i32 1, i32 0, ptr @CBArrays.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBArrays) %CBArrays.cb_h, ptr @CBArrays.cb, align 4
 
 // CHECK: define internal void @_init_buffer_CBTypedefArray.cb()
 // CHECK-NEXT: entry:
-// CHECK-NEXT: %CBTypedefArray.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBTypedefArray, 128, 0, 64))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBTypedefArrays_128_0_64tt(i32 1, i32 2, i32 1, i32 0, ptr @CBTypedefArray.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBTypedefArray, 128, 0, 64)) %CBTypedefArray.cb_h, ptr @CBTypedefArray.cb, align 4
+// CHECK-NEXT: %CBTypedefArray.cb_h = call target("dx.CBuffer", %__cblayout_CBTypedefArray)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBTypedefArrayst(i32 1, i32 2, i32 1, i32 0, ptr @CBTypedefArray.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBTypedefArray) %CBTypedefArray.cb_h, ptr @CBTypedefArray.cb, align 4
 
 // CHECK: define internal void @_init_buffer_CBStructs.cb()
 // CHECK-NEXT: entry:
-// CHECK-NEXT:   %CBStructs.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBStructs, 246, 0, 16, 32, 64, 144, 238, 240))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBStructss_246_0_16_32_64_144_238_240tt(i32 2, i32 0, i32 1, i32 0, ptr @CBStructs.str)
-// CHECK-NEXT:   store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBStructs, 246, 0, 16, 32, 64, 144, 238, 240)) %CBStructs.cb_h, ptr @CBStructs.cb, align 4
+// CHECK-NEXT:   %CBStructs.cb_h = call target("dx.CBuffer", %__cblayout_CBStructs)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBStructsst(i32 2, i32 0, i32 1, i32 0, ptr @CBStructs.str)
+// CHECK-NEXT:   store target("dx.CBuffer", %__cblayout_CBStructs) %CBStructs.cb_h, ptr @CBStructs.cb, align 4
 
 // CHECK: define internal void @_init_buffer_CBClasses.cb()
 // CHECK-NEXT: entry:
-// CHECK-NEXT: %CBClasses.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBClasses, 260, 0, 16, 32, 112))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBClassess_260_0_16_32_112tt(i32 3, i32 0, i32 1, i32 0, ptr @CBClasses.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBClasses, 260, 0, 16, 32, 112)) %CBClasses.cb_h, ptr @CBClasses.cb, align 4
+// CHECK-NEXT: %CBClasses.cb_h = call target("dx.CBuffer", %__cblayout_CBClasses)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBClassesst(i32 3, i32 0, i32 1, i32 0, ptr @CBClasses.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBClasses) %CBClasses.cb_h, ptr @CBClasses.cb, align 4
 
 // CHECK: define internal void @_init_buffer_CBMix.cb()
 // CHECK-NEXT: entry:
-// CHECK-NEXT: %CBMix.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBMix, 170, 0, 24, 32, 120, 128, 136, 144, 152, 160, 168))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBMixs_170_0_24_32_120_128_136_144_152_160_168tt(i32 4, i32 0, i32 1, i32 0, ptr @CBMix.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBMix, 170, 0, 24, 32, 120, 128, 136, 144, 152, 160, 168)) %CBMix.cb_h, ptr @CBMix.cb, align 4
+// CHECK-NEXT: %CBMix.cb_h = call target("dx.CBuffer", %__cblayout_CBMix)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBMixst(i32 4, i32 0, i32 1, i32 0, ptr @CBMix.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBMix) %CBMix.cb_h, ptr @CBMix.cb, align 4
 
 // CHECK: define internal void @_init_buffer_CB_A.cb()
 // CHECK-NEXT: entry:
-// CHECK-NEXT: %CB_A.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_A, 188, 0, 32, 76, 80, 120, 128, 144, 160, 182))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CB_As_188_0_32_76_80_120_128_144_160_182tt(i32 5, i32 0, i32 1, i32 0, ptr @CB_A.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_A, 188, 0, 32, 76, 80, 120, 128, 144, 160, 182)) %CB_A.cb_h, ptr @CB_A.cb, align 4
+// CHECK-NEXT: %CB_A.cb_h = call target("dx.CBuffer", %__cblayout_CB_A)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CB_Ast(i32 5, i32 0, i32 1, i32 0, ptr @CB_A.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CB_A) %CB_A.cb_h, ptr @CB_A.cb, align 4
 
 // CHECK: define internal void @_init_buffer_CB_B.cb()
 // CHECK-NEXT: entry:
-// CHECK-NEXT: %CB_B.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_B, 94, 0, 88))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CB_Bs_94_0_88tt(i32 6, i32 0, i32 1, i32 0, ptr @CB_B.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_B, 94, 0, 88)) %CB_B.cb_h, ptr @CB_B.cb, align 4
+// CHECK-NEXT: %CB_B.cb_h = call target("dx.CBuffer", %__cblayout_CB_B)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CB_Bst(i32 6, i32 0, i32 1, i32 0, ptr @CB_B.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CB_B) %CB_B.cb_h, ptr @CB_B.cb, align 4
 
 // CHECK: define internal void @_init_buffer_CB_C.cb()
 // CHECK-NEXT: entry:
-// CHECK-NEXT: %CB_C.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_C, 400, 0, 16, 112, 128, 392))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CB_Cs_400_0_16_112_128_392tt(i32 7, i32 0, i32 1, i32 0, ptr @CB_C.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_C, 400, 0, 16, 112, 128, 392)) %CB_C.cb_h, ptr @CB_C.cb, align 4
+// CHECK-NEXT: %CB_C.cb_h = call target("dx.CBuffer", %__cblayout_CB_C)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CB_Cst(i32 7, i32 0, i32 1, i32 0, ptr @CB_C.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CB_C) %CB_C.cb_h, ptr @CB_C.cb, align 4
 
 RWBuffer<float> Buf;
 
diff --git a/clang/test/CodeGenHLSL/resources/cbuffer_and_namespaces.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer_and_namespaces.hlsl
index b7bdce32e6507..1fe0a68261c94 100644
--- a/clang/test/CodeGenHLSL/resources/cbuffer_and_namespaces.hlsl
+++ b/clang/test/CodeGenHLSL/resources/cbuffer_and_namespaces.hlsl
@@ -4,18 +4,18 @@
 
 // CHECK: %"n0::n1::__cblayout_A" = type <{ float }>
 // CHECK: %"n0::__cblayout_B" = type <{ float }>
-// CHECK: %"n0::n2::__cblayout_C" = type <{ float, target("dx.Layout", %"n0::Foo", 4, 0) }>
+// CHECK: %"n0::n2::__cblayout_C" = type <{ float, target("dx.Padding", 12), %"n0::Foo" }>
 // CHECK: %"n0::Foo" = type <{ float }>
 
-// CHECK: @A.cb = global target("dx.CBuffer", target("dx.Layout", %"n0::n1::__cblayout_A", 4, 0))
+// CHECK: @A.cb = global target("dx.CBuffer", %"n0::n1::__cblayout_A")
 // CHECK: @_ZN2n02n11aE = external hidden addrspace(2) global float, align 4
 
-// CHECK: @B.cb = global target("dx.CBuffer", target("dx.Layout", %"n0::__cblayout_B", 4, 0))
+// CHECK: @B.cb = global target("dx.CBuffer", %"n0::__cblayout_B")
 // CHECK: @_ZN2n01aE = external hidden addrspace(2) global float, align 4
 
-// CHECK: @C.cb = global target("dx.CBuffer", target("dx.Layout", %"n0::n2::__cblayout_C", 20, 0, 16))
+// CHECK: @C.cb = global target("dx.CBuffer", %"n0::n2::__cblayout_C")
 // CHECK: @_ZN2n02n21aE = external hidden addrspace(2) global float, align 4
-// CHECK: external hidden addrspace(2) global target("dx.Layout", %"n0::Foo", 4, 0), align 1
+// CHECK: external hidden addrspace(2) global %"n0::Foo", align 1
 
 namespace n0 {
   struct Foo {
diff --git a/clang/test/CodeGenHLSL/resources/cbuffer_with_packoffset.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer_with_packoffset.hlsl
index 7bedd63c9f65d..c39fd75ec6ee4 100644
--- a/clang/test/CodeGenHLSL/resources/cbuffer_with_packoffset.hlsl
+++ b/clang/test/CodeGenHLSL/resources/cbuffer_with_packoffset.hlsl
@@ -2,10 +2,13 @@
 // RUN:   dxil-pc-shadermodel6.3-compute %s \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
 
-// CHECK: %__cblayout_CB = type <{ float, double, <2 x i32> }>
-// CHECK: %__cblayout_CB_1 = type <{ float, <2 x float> }>
+// TODO: Reordering fields doesn't work...
+// XFAIL: *
 
-// CHECK: @CB.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 176, 16, 168, 88))
+// CHECK: %__cblayout_CB = type <{ [16 x i8], float, [68 x i8], <2 x i32>, [72 x i8], double }>
+// CHECK: %__cblayout_CB_1 = type <{ [80 x i8], <2 x float>, float }>
+
+// CHECK: @CB.cb = global target("dx.CBuffer", %__cblayout_CB)
 // CHECK: @a = external hidden addrspace(2) global float, align 4
 // CHECK: @b = external hidden addrspace(2) global double, align 8
 // CHECK: @c = external hidden addrspace(2) global <2 x i32>, align 8
@@ -17,7 +20,7 @@ cbuffer CB : register(b1, space3) {
   int2 c : packoffset(c5.z);
 }
 
-// CHECK: @CB.cb.1 = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_1, 92, 88, 80))
+// CHECK: @CB.cb.1 = global target("dx.CBuffer", %__cblayout_CB_1)
 // CHECK: @x = external hidden addrspace(2) global float, align 4
 // CHECK: @y = external hidden addrspace(2) global <2 x float>, align 8
 
@@ -30,7 +33,7 @@ cbuffer CB : register(b0) {
 
 // CHECK: define internal void @_init_buffer_CB.cb()
 // CHECK-NEXT: entry:
-// CHECK-NEXT: %CB.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 176, 16, 168, 88))
+// CHECK-NEXT: %CB.cb_h = call target("dx.CBuffer", %__cblayout_CB)
 // CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBs_176_16_168_88tt(i32 3, i32 1, i32 1, i32 0, ptr @CB.str)
 
 float foo() {
diff --git a/clang/test/CodeGenHLSL/resources/cbuffer_with_static_global_and_function.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer_with_static_global_and_function.hlsl
index fa3405df9e3d3..b8c7babb8d634 100644
--- a/clang/test/CodeGenHLSL/resources/cbuffer_with_static_global_and_function.hlsl
+++ b/clang/test/CodeGenHLSL/resources/cbuffer_with_static_global_and_function.hlsl
@@ -2,7 +2,7 @@
 
 // CHECK: %__cblayout_A = type <{ float }>
 
-// CHECK: @A.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_A, 4, 0))
+// CHECK: @A.cb = global target("dx.CBuffer", %__cblayout_A)
 // CHECK: @a = external hidden addrspace(2) global float, align 4
 // CHECK-DAG: @_ZL1b = internal global float 3.000000e+00, align 4
 // CHECK-NOT: @B.cb
diff --git a/clang/test/CodeGenHLSL/resources/default_cbuffer.hlsl b/clang/test/CodeGenHLSL/resources/default_cbuffer.hlsl
index ad4d92f8afc02..5333dad962195 100644
--- a/clang/test/CodeGenHLSL/resources/default_cbuffer.hlsl
+++ b/clang/test/CodeGenHLSL/resources/default_cbuffer.hlsl
@@ -1,19 +1,18 @@
 // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-compute -fnative-half-type -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,DXIL
 // RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan1.3-compute -fnative-half-type -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,SPIRV
 
-// DXIL: %"__cblayout_$Globals" = type <{ float, float, target("dx.Layout", %__cblayout_S, 4, 0) }>
-// SPIRV: %"__cblayout_$Globals" = type <{ float, float, target("spirv.Layout", %__cblayout_S, 4, 0) }>
+// CHECK: %"__cblayout_$Globals" = type <{ float, float, target("{{.*}}.Padding", 8), %__cblayout_S }>
 // CHECK: %__cblayout_S = type <{ float }>
 
-// DXIL-DAG: @"$Globals.cb" = global target("dx.CBuffer", target("dx.Layout", %"__cblayout_$Globals", 20, 0, 4, 16))
+// DXIL-DAG: @"$Globals.cb" = global target("dx.CBuffer", %"__cblayout_$Globals")
 // DXIL-DAG: @a = external hidden addrspace(2) global float
 // DXIL-DAG: @g = external hidden addrspace(2) global float
-// DXIL-DAG: @h = external hidden addrspace(2) global target("dx.Layout", %__cblayout_S, 4, 0), align 4
+// DXIL-DAG: @h = external hidden addrspace(2) global %__cblayout_S, align 4
 
-// SPIRV-DAG: @"$Globals.cb" = global target("spirv.VulkanBuffer", target("spirv.Layout", %"__cblayout_$Globals", 20, 0, 4, 16), 2, 0)
+// SPIRV-DAG: @"$Globals.cb" = global target("spirv.VulkanBuffer", %"__cblayout_$Globals", 2, 0)
 // SPIRV-DAG: @a = external hidden addrspace(12) global float
 // SPIRV-DAG: @g = external hidden addrspace(12) global float
-// SPIRV-DAG: @h = external hidden addrspace(12) global target("spirv.Layout", %__cblayout_S, 4, 0), align 8
+// SPIRV-DAG: @h = external hidden addrspace(12) global %__cblayout_S, align 8
 
 struct EmptyStruct {
 };
diff --git a/clang/test/CodeGenHLSL/resources/default_cbuffer_with_layout.hlsl b/clang/test/CodeGenHLSL/resources/default_cbuffer_with_layout.hlsl
index 1b2cb0e99aa83..fee3b77c32dd2 100644
--- a/clang/test/CodeGenHLSL/resources/default_cbuffer_with_layout.hlsl
+++ b/clang/test/CodeGenHLSL/resources/default_cbuffer_with_layout.hlsl
@@ -13,6 +13,9 @@
 // CHECK-DAG: @e = external hidden addrspace(2) global <4 x float>, align 16
 // CHECK-DAG: @s = external hidden addrspace(2) global target("dx.Layout", %S, 8, 0), align 1
 
+// TODO: Reordering fields doesn't work...
+// XFAIL: *
+
 struct S {
   float2 v;
 };
diff --git a/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp b/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp
index f7fb886e7391d..2094ec61fcdb8 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp
@@ -53,7 +53,12 @@ static Instruction *findHandleDef(GlobalVariable *HandleVar) {
 }
 
 static bool replaceCBufferAccesses(Module &M) {
-  std::optional<hlsl::CBufferMetadata> CBufMD = hlsl::CBufferMetadata::get(M);
+  std::optional<hlsl::CBufferMetadata> CBufMD =
+      hlsl::CBufferMetadata::get(M, [](Type *Ty) {
+        if (auto *TET = dyn_cast<TargetExtType>(Ty))
+          return TET->getName() == "spirv.Padding";
+        return false;
+      });
   if (!CBufMD)
     return false;
 
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index c6c618218006a..31d0dadaeb398 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -835,9 +835,6 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
         if (Ty->isArrayTy())
           Ty = Ty->getArrayElementType();
         else {
-          TargetExtType *BufferTy = cast<TargetExtType>(Ty);
-          assert(BufferTy->getTargetExtName() == "spirv.Layout");
-          Ty = BufferTy->getTypeParameter(0);
           assert(Ty && Ty->isStructTy());
           uint32_t Index = cast<ConstantInt>(II->getOperand(1))->getZExtValue();
           Ty = cast<StructType>(Ty)->getElementType(Index);
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll
index 4d32e66d017c9..cbc038329618c 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll
@@ -21,7 +21,7 @@
 %MyStruct = type { <4 x float> }
 %__cblayout_MyCBuffer = type <{ %MyStruct, <4 x float> }>
 
- at MyCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_MyCBuffer, 32, 0, 16), 2, 0) poison
+ at MyCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) poison
 @s = external hidden local_unnamed_addr addrspace(12) global %MyStruct, align 16
 @v = external hidden local_unnamed_addr addrspace(12) global <4 x float>, align 16
 @MyCBuffer.str = private unnamed_addr constant [10 x i8] c"MyCBuffer\00", align 1
@@ -30,10 +30,10 @@
 define void @main() {
 entry:
 ; CHECK: %[[tmp:[0-9]+]] = OpCopyObject %[[wrapper_ptr_t]] %[[MyCBuffer]]
-  %MyCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_MyCBuffer, 32, 0, 16), 2, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_MyCBuffers_32_0_16t_2_0t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @MyCBuffer.str)
-  store target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_MyCBuffer, 32, 0, 16), 2, 0) %MyCBuffer.cb_h.i.i, ptr @MyCBuffer.cb, align 8
+  %MyCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) @llvm.spv.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr nonnull @MyCBuffer.str)
+  store target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) %MyCBuffer.cb_h.i.i, ptr @MyCBuffer.cb, align 8
   %0 = tail call target("spirv.Image", float, 5, 2, 0, 0, 2, 3) @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_5_2_0_0_2_3t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str)
-  
+
 ; CHECK: %[[tmp_ptr:[0-9]+]] = OpAccessChain {{%[0-9]+}} %[[tmp]] %[[uint_0]] %[[uint_0]]
 ; CHECK: %[[v_ptr:.+]] = OpAccessChain %[[_ptr_Uniform_v4float]] %[[tmp]] %[[uint_0]] %[[uint_1]]
 ; CHECK: %[[s_ptr_gep:[0-9]+]] = OpInBoundsAccessChain %[[_ptr_Uniform_float]] %[[tmp_ptr]] %[[uint_0]] %[[uint_1]]
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer_unused.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer_unused.ll
index c365452a9b404..670548d3d3e27 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer_unused.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer_unused.ll
@@ -14,12 +14,12 @@
 %__cblayout_PartiallyUsedCBuffer = type <{ float, i32 }>
 %__cblayout_AnotherCBuffer = type <{ <4 x float>, <4 x float> }>
 
- at UnusedCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_UnusedCBuffer, 4, 0), 2, 0) poison
+ at UnusedCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_UnusedCBuffer, 2, 0) poison
 @UnusedCBuffer.str = private unnamed_addr constant [14 x i8] c"UnusedCBuffer\00", align 1
- at PartiallyUsedCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_PartiallyUsedCBuffer, 8, 0, 4), 2, 0) poison
+ at PartiallyUsedCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_PartiallyUsedCBuffer, 2, 0) poison
 @used_member = external hidden local_unnamed_addr addrspace(12) global float, align 4
 @PartiallyUsedCBuffer.str = private unnamed_addr constant [21 x i8] c"PartiallyUsedCBuffer\00", align 1
- at AnotherCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_AnotherCBuffer, 32, 0, 16), 2, 0) poison
+ at AnotherCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_AnotherCBuffer, 2, 0) poison
 @a = external hidden local_unnamed_addr addrspace(12) global <4 x float>, align 16
 @AnotherCBuffer.str = private unnamed_addr constant [15 x i8] c"AnotherCBuffer\00", align 1
 @.str = private unnamed_addr constant [7 x i8] c"output\00", align 1
@@ -28,18 +28,18 @@
 ; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(readwrite, argmem: write, inaccessiblemem: none)
 define void @main() local_unnamed_addr #1 {
 entry:
-  %UnusedCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_UnusedCBuffer, 4, 0), 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_UnusedCBuffers_4_0t_2_0t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @UnusedCBuffer.str)
-  store target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_UnusedCBuffer, 4, 0), 2, 0) %UnusedCBuffer.cb_h.i.i, ptr @UnusedCBuffer.cb, align 8
+  %UnusedCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_UnusedCBuffer, 2, 0) @llvm.spv.resource.handlefromimplicitbinding(i32 0, i32 0, i32 1, i32 0, ptr nonnull @UnusedCBuffer.str)
+  store target("spirv.VulkanBuffer", %__cblayout_UnusedCBuffer, 2, 0) %UnusedCBuffer.cb_h.i.i, ptr @UnusedCBuffer.cb, align 8
 
 ; CHECK: %[[tmp:[0-9]+]] = OpCopyObject {{%[0-9]+}} %[[PartiallyUsedCBuffer]]
 ; CHECK: %[[used_member_ptr:.+]] = OpAccessChain %{{.+}} %[[tmp]] %{{.+}} %[[uint_0:[0-9]+]]
-  %PartiallyUsedCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_PartiallyUsedCBuffer, 8, 0, 4), 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_PartiallyUsedCBuffers_8_0_4t_2_0t(i32 1, i32 0, i32 1, i32 0, ptr nonnull @PartiallyUsedCBuffer.str)
-  store target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_PartiallyUsedCBuffer, 8, 0, 4), 2, 0) %PartiallyUsedCBuffer.cb_h.i.i, ptr @PartiallyUsedCBuffer.cb, align 8
+  %PartiallyUsedCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_PartiallyUsedCBuffer, 2, 0) @llvm.spv.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr nonnull @PartiallyUsedCBuffer.str)
+  store target("spirv.VulkanBuffer", %__cblayout_PartiallyUsedCBuffer, 2, 0) %PartiallyUsedCBuffer.cb_h.i.i, ptr @PartiallyUsedCBuffer.cb, align 8
 
 ; CHECK: %[[tmp:[0-9]+]] = OpCopyObject {{%[0-9]+}} %[[AnotherCBuffer]]
 ; CHECK: %[[a_ptr:.+]] = OpAccessChain %{{.+}} %[[tmp]] %{{.+}} %[[uint_0]]
-  %AnotherCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_AnotherCBuffer, 32, 0, 16), 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_AnotherCBuffers_32_0_16t_2_0t(i32 2, i32 0, i32 1, i32 0, ptr nonnull @AnotherCBuffer.str)
-  store target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_AnotherCBuffer, 32, 0, 16), 2, 0) %AnotherCBuffer.cb_h.i.i, ptr @AnotherCBuffer.cb, align 8
+  %AnotherCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_AnotherCBuffer, 2, 0) @llvm.spv.resource.handlefromimplicitbinding(i32 2, i32 0, i32 1, i32 0, ptr nonnull @AnotherCBuffer.str)
+  store target("spirv.VulkanBuffer", %__cblayout_AnotherCBuffer, 2, 0) %AnotherCBuffer.cb_h.i.i, ptr @AnotherCBuffer.cb, align 8
   %0 = tail call target("spirv.Image", float, 5, 2, 0, 0, 2, 1) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_f32_5_2_0_0_2_1t(i32 3, i32 0, i32 1, i32 0, ptr nonnull @.str)
 
   %2 = load float, ptr addrspace(12) @used_member, align 4

>From 3aa9fd7f823423788dda77ebcc1ef292f6ae6b08 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Fri, 26 Sep 2025 15:00:52 -0700
Subject: [PATCH 4/4] [DirectX] Drop dx.Layout extension type

TODO: This must go in after the frontend changes
---
 llvm/include/llvm/Analysis/DXILResource.h     | 21 -----
 llvm/lib/Analysis/DXILResource.cpp            | 13 ---
 .../DirectX/Metadata/cbuffer-layouttype.ll    | 82 -------------------
 3 files changed, 116 deletions(-)
 delete mode 100644 llvm/test/CodeGen/DirectX/Metadata/cbuffer-layouttype.ll

diff --git a/llvm/include/llvm/Analysis/DXILResource.h b/llvm/include/llvm/Analysis/DXILResource.h
index c7aff167324e6..bcbca78504041 100644
--- a/llvm/include/llvm/Analysis/DXILResource.h
+++ b/llvm/include/llvm/Analysis/DXILResource.h
@@ -222,27 +222,6 @@ class AnyResourceExtType : public TargetExtType {
   }
 };
 
-/// The dx.Layout target extension type
-///
-/// `target("dx.Layout", <Type>, <size>, [offsets...])`
-class LayoutExtType : public TargetExtType {
-public:
-  LayoutExtType() = delete;
-  LayoutExtType(const LayoutExtType &) = delete;
-  LayoutExtType &operator=(const LayoutExtType &) = delete;
-
-  Type *getWrappedType() const { return getTypeParameter(0); }
-  uint32_t getSize() const { return getIntParameter(0); }
-  uint32_t getOffsetOfElement(int I) const { return getIntParameter(I + 1); }
-
-  static bool classof(const TargetExtType *T) {
-    return T->getName() == "dx.Layout";
-  }
-  static bool classof(const Type *T) {
-    return isa<TargetExtType>(T) && classof(cast<TargetExtType>(T));
-  }
-};
-
 /// The dx.Padding target extension type
 ///
 /// `target("dx.Padding", NumBytes)`
diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp
index 6f19a68dcd194..14d9925b4297a 100644
--- a/llvm/lib/Analysis/DXILResource.cpp
+++ b/llvm/lib/Analysis/DXILResource.cpp
@@ -380,13 +380,6 @@ StructType *ResourceTypeInfo::createElementStruct(StringRef CBufferName) {
       Name.append(CBufferName);
     }
 
-    // TODO: Remove this when we update the frontend to use explicit padding.
-    if (LayoutExtType *LayoutType =
-            dyn_cast<LayoutExtType>(RTy->getResourceType())) {
-      StructType *Ty = cast<StructType>(LayoutType->getWrappedType());
-      return StructType::create(Ty->elements(), Name);
-    }
-
     return getOrCreateElementStruct(
         getTypeWithoutPadding(RTy->getResourceType()), Name);
   }
@@ -498,13 +491,7 @@ ResourceTypeInfo::UAVInfo ResourceTypeInfo::getUAV() const {
 
 uint32_t ResourceTypeInfo::getCBufferSize(const DataLayout &DL) const {
   assert(isCBuffer() && "Not a CBuffer");
-
   Type *ElTy = cast<CBufferExtType>(HandleTy)->getResourceType();
-
-  // TODO: Remove this when we update the frontend to use explicit padding.
-  if (auto *LayoutTy = dyn_cast<LayoutExtType>(ElTy))
-    return LayoutTy->getSize();
-
   return DL.getTypeAllocSize(ElTy);
 }
 
diff --git a/llvm/test/CodeGen/DirectX/Metadata/cbuffer-layouttype.ll b/llvm/test/CodeGen/DirectX/Metadata/cbuffer-layouttype.ll
deleted file mode 100644
index 85952c9ae4e83..0000000000000
--- a/llvm/test/CodeGen/DirectX/Metadata/cbuffer-layouttype.ll
+++ /dev/null
@@ -1,82 +0,0 @@
-; TODO: Remove this test once we've updated the frontend to use explicit
-; padding. The cbuffer-metadata.ll test covers the newer logic.
-
-; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s
-; RUN: opt -S --passes="dxil-pretty-printer" < %s 2>&1 | FileCheck %s --check-prefix=PRINT
-; RUN: llc %s --filetype=asm -o - < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,PRINT
-
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-pc-shadermodel6.6-compute"
-
-%__cblayout_CB1 = type <{ float, i32, double, <2 x i32> }>
- at CB1.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB1, 24, 0, 4, 8, 16)) poison
- at CB1.str = private unnamed_addr constant [4 x i8] c"CB1\00", align 1
-
-%__cblayout_CB2 = type <{ float, double, float, half, i16, i64, i32 }>
- at CB2.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB2, 36, 0, 8, 16, 20, 22, 24, 32)) poison
- at CB2.str = private unnamed_addr constant [4 x i8] c"CB2\00", align 1
-
-%__cblayout_MyConstants = type <{ double, <3 x float>, float, <3 x double>, half, <2 x double>, float, <3 x half>, <3 x half> }>
- at MyConstants.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_MyConstants, 96, 0, 16, 28, 32, 56, 64, 80, 84, 90)) poison
- at MyConstants.str = private unnamed_addr constant [12 x i8] c"MyConstants\00", align 1
-
-; PRINT:; Resource Bindings:
-; PRINT-NEXT:;
-; PRINT-NEXT:; Name            Type  Format  Dim   ID    HLSL Bind  Count
-; PRINT-NEXT:; ----
-; PRINT-NEXT:; CB1          cbuffer      NA   NA  CB0          cb0     1
-; PRINT-NEXT:; CB2          cbuffer      NA   NA  CB1          cb1     1
-; PRINT-NEXT:; MyConstants  cbuffer      NA   NA  CB2  cb5,space15     1
-
-define void @test() #0 {
-
-  ; cbuffer CB1 : register(b0) {
-  ;   float a;
-  ;   int b;
-  ;   double c;
-  ;   int2 d;
-  ; }
-  %CB1.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB1, 24, 0, 4, 8, 16))
-            @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr @CB1.str)
-  ; cbuffer CB2 : register(b0) {
-  ;   float a;
-  ;   double b;
-  ;   float c;
-  ;   half d;
-  ;   uint16_t e;
-  ;   int64_t f;
-  ;   int g;
-  ;}
-
-  %CB2.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB2, 36, 0, 8, 16, 20, 22, 24, 32))
-            @llvm.dx.resource.handlefrombinding(i32 0, i32 1, i32 1, i32 0, ptr @CB2.str)
-  ; cbuffer CB3 : register(b5) {
-  ;   double B0;
-  ;   float3 B1;
-  ;   float B2;
-  ;   double3 B3;
-  ;   half B4;
-  ;   double2 B5;
-  ;   float B6;
-  ;   half3 B7;
-  ;   half3 B8;
-  ; }
-  %CB3.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_MyConstants, 96, 0, 16, 28, 32, 56, 64, 80, 84, 90))
-            @llvm.dx.resource.handlefrombinding(i32 15, i32 5, i32 1, i32 0, ptr @MyConstants.str)
-
-  ret void
-}
-
-attributes #0 = { noinline nounwind "hlsl.shader"="compute" }
-
-; CHECK: @CB1 = external constant %CBuffer.CB1
-; CHECK: @CB2 = external constant %CBuffer.CB2
-; CHECK: @MyConstants = external constant %CBuffer.MyConstants
-
-; CHECK: !dx.resources = !{[[ResList:[!][0-9]+]]}
-
-; CHECK: [[ResList]] = !{null, null, [[CBList:[!][0-9]+]], null}
-; CHECK: [[CBList]] = !{![[CB1:[0-9]+]], ![[CB2:[0-9]+]], ![[MYCONSTANTS:[0-9]+]]}
-; CHECK: ![[CB1]] = !{i32 0, ptr @CB1, !"CB1", i32 0, i32 0, i32 1, i32 24, null}
-; CHECK: ![[CB2]] = !{i32 1, ptr @CB2, !"CB2", i32 0, i32 1, i32 1, i32 36, null}
-; CHECK: ![[MYCONSTANTS]] = !{i32 2, ptr @MyConstants, !"MyConstants", i32 15, i32 5, i32 1, i32 96, null}



More information about the cfe-commits mailing list