[clang] [llvm] Explicit types in cbuffer layouts (PR #156919)

Justin Bogner via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 28 10:25:13 PDT 2025


https://github.com/bogner updated https://github.com/llvm/llvm-project/pull/156919

>From f6008b351cea1ff44e98d4e8ac6f442339830031 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Tue, 19 Aug 2025 13:23:42 -0600
Subject: [PATCH 1/8] [DirectX] Add 32- and 64-bit 3-element vectors to
 DataLayout

This explicitly adds two 3-element vectors to the DataLayout so that
they'll be element-aligned. We need to do this more generally for
vectors, but this unblocks some very common cases.

Workaround for #123968
---
 clang/lib/Basic/Targets/DirectX.h                 | 5 ++++-
 clang/test/CodeGenHLSL/basic-target.c             | 2 +-
 llvm/lib/TargetParser/TargetDataLayout.cpp        | 5 ++++-
 llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll | 8 ++++----
 4 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/clang/lib/Basic/Targets/DirectX.h b/clang/lib/Basic/Targets/DirectX.h
index bd13c9ee0fd05..a21a593365773 100644
--- a/clang/lib/Basic/Targets/DirectX.h
+++ b/clang/lib/Basic/Targets/DirectX.h
@@ -64,8 +64,11 @@ class LLVM_LIBRARY_VISIBILITY DirectXTargetInfo : public TargetInfo {
     NoAsmVariants = true;
     PlatformMinVersion = Triple.getOSVersion();
     PlatformName = llvm::Triple::getOSTypeName(Triple.getOS());
+    // TODO: We need to align vectors on the element size generally, but for now
+    // we hard code this for 3-element 32- and 64-bit vectors as a workaround.
+    // See https://github.com/llvm/llvm-project/issues/123968
     resetDataLayout("e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:"
-                    "32-f64:64-n8:16:32:64");
+                    "32-f64:64-n8:16:32:64-v48:16:16-v96:32:32-v192:64:64");
     TheCXXABI.set(TargetCXXABI::GenericItanium);
   }
   bool useFP16ConversionIntrinsics() const override { return false; }
diff --git a/clang/test/CodeGenHLSL/basic-target.c b/clang/test/CodeGenHLSL/basic-target.c
index c700e06bd5850..b9482df5a0987 100644
--- a/clang/test/CodeGenHLSL/basic-target.c
+++ b/clang/test/CodeGenHLSL/basic-target.c
@@ -6,5 +6,5 @@
 // RUN: %clang -cc1 -triple dxil-pc-shadermodel6.0-domain -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang -cc1 -triple dxil-pc-shadermodel6.0-geometry -emit-llvm -o - %s | FileCheck %s
 
-// CHECK: target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
+// CHECK: target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64-v48:16:16-v96:32:32-v192:64:64"
 // CHECK: target triple = "dxilv1.0-pc-shadermodel6.0-{{[a-z]+}}"
diff --git a/llvm/lib/TargetParser/TargetDataLayout.cpp b/llvm/lib/TargetParser/TargetDataLayout.cpp
index cea246e9527bd..5e6f77316780e 100644
--- a/llvm/lib/TargetParser/TargetDataLayout.cpp
+++ b/llvm/lib/TargetParser/TargetDataLayout.cpp
@@ -548,8 +548,11 @@ std::string Triple::computeDataLayout(StringRef ABIName) const {
   case Triple::csky:
     return computeCSKYDataLayout(*this);
   case Triple::dxil:
+    // TODO: We need to align vectors on the element size generally, but for now
+    // we hard code this for 3-element 32- and 64-bit vectors as a workaround.
+    // See https://github.com/llvm/llvm-project/issues/123968
     return "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-"
-           "f32:32-f64:64-n8:16:32:64";
+           "f32:32-f64:64-n8:16:32:64-v48:16:16-v96:32:32-v192:64:64";
   case Triple::hexagon:
     return "e-m:e-p:32:32:32-a:0-n16:32-"
            "i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-"
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll
index a78fdd5037f93..f1486f974fb36 100644
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll
@@ -74,7 +74,7 @@ entry:
 ; CHECK:    [[UPTO1:%.*]] = insertelement <3 x double> [[UPTO0]], double [[Y]], i32 1
 ; CHECK:    [[UPTO2:%.*]] = insertelement <3 x double> [[UPTO1]], double [[Z]], i32 2
 ; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY:%.*]], i32 0
-; CHECK:    store <3 x double> [[UPTO2]], ptr [[DEST]], align 32
+; CHECK:    store <3 x double> [[UPTO2]], ptr [[DEST]], align 8
 ; CHECK:    [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5)
 ; CHECK:    [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
 ; CHECK:    [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
@@ -83,9 +83,9 @@ entry:
 ; CHECK:    [[UPTO0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
 ; CHECK:    [[UPTO1:%.*]] = insertelement <3 x double> [[UPTO0]], double [[Y]], i32 1
 ; CHECK:    [[UPTO2:%.*]] = insertelement <3 x double> [[UPTO1]], double [[Z]], i32 2
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY]], i32 32
-; CHECK:    store <3 x double> [[UPTO2]], ptr [[DEST]], align 32
-  call void @llvm.memcpy.p0.p2.i32(ptr align 32 %a2.copy, ptr addrspace(2) align 32 @a2, i32 64, i1 false)
+; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY]], i32 24
+; CHECK:    store <3 x double> [[UPTO2]], ptr [[DEST]], align 8
+  call void @llvm.memcpy.p0.p2.i32(ptr align 32 %a2.copy, ptr addrspace(2) align 32 @a2, i32 48, i1 false)
 
 ; CHECK:    [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
 ; CHECK:    [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 7)

>From b0b252904e5c0d204304c73b24164c5d3a2de1a0 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Thu, 7 Aug 2025 15:02:49 -0700
Subject: [PATCH 2/8] [DirectX] Introduce `dx.Padding` type

This introduces the `dx.Padding` type as an alternative to the
`dx.Layout` types that are currently used for cbuffers. Later, we'll
remove the `dx.Layout` types completely, but making the backend handle
either makes it easier to stage the necessary changes to get there.

See #147352 for details.
---
 llvm/include/llvm/Analysis/DXILResource.h     | 19 ++++
 llvm/lib/Analysis/DXILResource.cpp            | 47 +++++++++-
 llvm/lib/IR/Type.cpp                          |  4 +
 .../DXILResource/buffer-frombinding.ll        |  4 +-
 .../DirectX/CBufferLoadLegacy-errors.ll       | 12 +--
 .../test/CodeGen/DirectX/CBufferLoadLegacy.ll | 12 +--
 .../ContainerData/PSVResources-order.ll       |  2 +-
 .../DirectX/ContainerData/PSVResources.ll     |  2 +-
 .../DirectX/CreateHandleFromBinding.ll        |  2 +-
 .../ForwardHandleAccesses/cbuffer-access.ll   | 20 ++---
 ...ffer_metadata.ll => cbuffer-layouttype.ll} |  3 +
 .../DirectX/Metadata/cbuffer-metadata.ll      | 89 +++++++++++++++++++
 .../CodeGen/DirectX/Metadata/cbuffer-only.ll  |  2 +-
 llvm/unittests/Analysis/DXILResourceTest.cpp  |  4 +-
 14 files changed, 187 insertions(+), 35 deletions(-)
 rename llvm/test/CodeGen/DirectX/Metadata/{cbuffer_metadata.ll => cbuffer-layouttype.ll} (96%)
 create mode 100644 llvm/test/CodeGen/DirectX/Metadata/cbuffer-metadata.ll

diff --git a/llvm/include/llvm/Analysis/DXILResource.h b/llvm/include/llvm/Analysis/DXILResource.h
index 88ac0a11fe5a2..c7aff167324e6 100644
--- a/llvm/include/llvm/Analysis/DXILResource.h
+++ b/llvm/include/llvm/Analysis/DXILResource.h
@@ -243,6 +243,25 @@ class LayoutExtType : public TargetExtType {
   }
 };
 
+/// The dx.Padding target extension type
+///
+/// `target("dx.Padding", NumBytes)`
+class PaddingExtType : public TargetExtType {
+public:
+  PaddingExtType() = delete;
+  PaddingExtType(const PaddingExtType &) = delete;
+  PaddingExtType &operator=(const PaddingExtType &) = delete;
+
+  unsigned getNumBytes() const { return getIntParameter(0); }
+
+  static bool classof(const TargetExtType *T) {
+    return T->getName() == "dx.Padding";
+  }
+  static bool classof(const Type *T) {
+    return isa<TargetExtType>(T) && classof(cast<TargetExtType>(T));
+  }
+};
+
 //===----------------------------------------------------------------------===//
 
 class ResourceTypeInfo {
diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp
index b78cc03e34dbc..f9bf09262dd1f 100644
--- a/llvm/lib/Analysis/DXILResource.cpp
+++ b/llvm/lib/Analysis/DXILResource.cpp
@@ -281,6 +281,38 @@ static StructType *getOrCreateElementStruct(Type *ElemType, StringRef Name) {
   return StructType::create(ElemType, Name);
 }
 
+static Type *getTypeWithoutPadding(Type *Ty) {
+  // Recursively remove padding from structures.
+  if (auto *ST = dyn_cast<StructType>(Ty)) {
+    LLVMContext &Ctx = Ty->getContext();
+    SmallVector<Type *> ElementTypes;
+    ElementTypes.reserve(ST->getNumElements());
+    for (Type *ElTy : ST->elements()) {
+      if (isa<PaddingExtType>(ElTy))
+        continue;
+      ElementTypes.push_back(getTypeWithoutPadding(ElTy));
+    }
+
+    // Handle explicitly padded cbuffer arrays like { [ n x paddedty ], ty }
+    if (ElementTypes.size() == 2)
+      if (auto *AT = dyn_cast<ArrayType>(ElementTypes[0]))
+        if (ElementTypes[1] == AT->getElementType())
+          return ArrayType::get(ElementTypes[1], AT->getNumElements() + 1);
+
+    // If we only have a single element, don't wrap it in a struct.
+    if (ElementTypes.size() == 1)
+      return ElementTypes[0];
+
+    return StructType::get(Ctx, ElementTypes, /*IsPacked=*/false);
+  }
+  // Arrays just need to have their element type adjusted.
+  if (auto *AT = dyn_cast<ArrayType>(Ty))
+    return ArrayType::get(getTypeWithoutPadding(AT->getElementType()),
+                          AT->getNumElements());
+  // Anything else should be good as is.
+  return Ty;
+}
+
 StructType *ResourceTypeInfo::createElementStruct(StringRef CBufferName) {
   SmallString<64> TypeName;
 
@@ -334,14 +366,21 @@ StructType *ResourceTypeInfo::createElementStruct(StringRef CBufferName) {
   }
   case ResourceKind::CBuffer: {
     auto *RTy = cast<CBufferExtType>(HandleTy);
-    LayoutExtType *LayoutType = cast<LayoutExtType>(RTy->getResourceType());
-    StructType *Ty = cast<StructType>(LayoutType->getWrappedType());
     SmallString<64> Name = getResourceKindName(Kind);
     if (!CBufferName.empty()) {
       Name.append(".");
       Name.append(CBufferName);
     }
-    return StructType::create(Ty->elements(), Name);
+
+    // TODO: Remove this when we update the frontend to use explicit padding.
+    if (LayoutExtType *LayoutType =
+            dyn_cast<LayoutExtType>(RTy->getResourceType())) {
+      StructType *Ty = cast<StructType>(LayoutType->getWrappedType());
+      return StructType::create(Ty->elements(), Name);
+    }
+
+    return getOrCreateElementStruct(
+        getTypeWithoutPadding(RTy->getResourceType()), Name);
   }
   case ResourceKind::Sampler: {
     auto *RTy = cast<SamplerExtType>(HandleTy);
@@ -454,10 +493,10 @@ uint32_t ResourceTypeInfo::getCBufferSize(const DataLayout &DL) const {
 
   Type *ElTy = cast<CBufferExtType>(HandleTy)->getResourceType();
 
+  // TODO: Remove this when we update the frontend to use explicit padding.
   if (auto *LayoutTy = dyn_cast<LayoutExtType>(ElTy))
     return LayoutTy->getSize();
 
-  // TODO: What should we do with unannotated arrays?
   return DL.getTypeAllocSize(ElTy);
 }
 
diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp
index 9db48e8f6a96b..0e9535d24a4cc 100644
--- a/llvm/lib/IR/Type.cpp
+++ b/llvm/lib/IR/Type.cpp
@@ -1034,6 +1034,10 @@ static TargetTypeInfo getTargetTypeInfo(const TargetExtType *Ty) {
   }
 
   // DirectX resources
+  if (Name == "dx.Padding")
+    return TargetTypeInfo(
+        ArrayType::get(Type::getInt8Ty(C), Ty->getIntParameter(0)),
+        TargetExtType::CanBeGlobal);
   if (Name.starts_with("dx."))
     return TargetTypeInfo(PointerType::get(C, 0), TargetExtType::CanBeGlobal,
                           TargetExtType::CanBeLocal,
diff --git a/llvm/test/Analysis/DXILResource/buffer-frombinding.ll b/llvm/test/Analysis/DXILResource/buffer-frombinding.ll
index aeeb21ebb3201..ab1945d2921cc 100644
--- a/llvm/test/Analysis/DXILResource/buffer-frombinding.ll
+++ b/llvm/test/Analysis/DXILResource/buffer-frombinding.ll
@@ -150,7 +150,7 @@ define void @test_typedbuffer() {
   ; CHECK:   Kind: CBuffer
   ; CHECK:   CBuffer size: 4
 
-  %cb1 = call target("dx.CBuffer", target("dx.Layout", {float}, 4, 0))
+  %cb1 = call target("dx.CBuffer", <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>)
      @llvm.dx.resource.handlefrombinding(i32 1, i32 8, i32 1, i32 0, ptr @Constants.str)
   ; CHECK: Resource [[CB1:[0-9]+]]:
   ; CHECK:   Name: Constants
@@ -161,7 +161,7 @@ define void @test_typedbuffer() {
   ; CHECK:     Size: 1
   ; CHECK:   Class: CBV
   ; CHECK:   Kind: CBuffer
-  ; CHECK:   CBuffer size: 4
+  ; CHECK:   CBuffer size: 36
 
   ; CHECK-NOT: Resource {{[0-9]+}}:
 
diff --git a/llvm/test/CodeGen/DirectX/CBufferLoadLegacy-errors.ll b/llvm/test/CodeGen/DirectX/CBufferLoadLegacy-errors.ll
index 71dcf11b9dc88..196560f551f5f 100644
--- a/llvm/test/CodeGen/DirectX/CBufferLoadLegacy-errors.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferLoadLegacy-errors.ll
@@ -11,11 +11,11 @@ declare void @f16_user(half)
 ; CHECK-SAME: in function four64
 ; CHECK-SAME: Type mismatch between intrinsic and DXIL op
 define void @four64() "hlsl.export" {
-  %buffer = call target("dx.CBuffer", target("dx.Layout", {double}, 8, 0))
+  %buffer = call target("dx.CBuffer", <{ double }>)
       @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
 
   %load = call {double, double, double, double} @llvm.dx.resource.load.cbufferrow.4(
-      target("dx.CBuffer", target("dx.Layout", {double}, 8, 0)) %buffer,
+      target("dx.CBuffer", <{ double }>) %buffer,
       i32 0)
   %data = extractvalue {double, double, double, double} %load, 0
 
@@ -28,11 +28,11 @@ define void @four64() "hlsl.export" {
 ; CHECK-SAME: in function two32
 ; CHECK-SAME: Type mismatch between intrinsic and DXIL op
 define void @two32() "hlsl.export" {
-  %buffer = call target("dx.CBuffer", target("dx.Layout", {float}, 4, 0))
+  %buffer = call target("dx.CBuffer", <{ float }>)
       @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
 
   %load = call {float, float} @llvm.dx.resource.load.cbufferrow.2(
-      target("dx.CBuffer", target("dx.Layout", {float}, 4, 0)) %buffer,
+      target("dx.CBuffer", <{ float }>) %buffer,
       i32 0)
   %data = extractvalue {float, float} %load, 0
 
@@ -41,5 +41,5 @@ define void @two32() "hlsl.export" {
   ret void
 }
 
-declare { double, double, double, double } @llvm.dx.resource.load.cbufferrow.4.f64.f64.f64.f64.tdx.CBuffer_tdx.Layout_sl_f64s_8_0tt(target("dx.CBuffer", target("dx.Layout", { double }, 8, 0)), i32)
-declare { float, float } @llvm.dx.resource.load.cbufferrow.2.f32.f32.tdx.CBuffer_tdx.Layout_sl_f32s_4_0tt(target("dx.CBuffer", target("dx.Layout", { float }, 4, 0)), i32)
+declare { double, double, double, double } @llvm.dx.resource.load.cbufferrow.4.f64.f64.f64.f64.tdx.CBuffer_sl_f64st(target("dx.CBuffer", <{ double }>), i32)
+declare { float, float } @llvm.dx.resource.load.cbufferrow.2.f32.f32.tdx.CBuffer_sl_f32st(target("dx.CBuffer", <{ float }>), i32)
diff --git a/llvm/test/CodeGen/DirectX/CBufferLoadLegacy.ll b/llvm/test/CodeGen/DirectX/CBufferLoadLegacy.ll
index d6906516b716d..dd40aa8d8d31c 100644
--- a/llvm/test/CodeGen/DirectX/CBufferLoadLegacy.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferLoadLegacy.ll
@@ -8,12 +8,12 @@ declare void @f16_user(half)
 
 ; CHECK-LABEL: define void @loadf32
 define void @loadf32() {
-  %buffer = call target("dx.CBuffer", target("dx.Layout", {float}, 4, 0))
+  %buffer = call target("dx.CBuffer", <{ float }>)
       @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
 
   ; CHECK: [[DATA:%.*]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %{{.*}}, i32 0)
   %load = call {float, float, float, float} @llvm.dx.resource.load.cbufferrow.4(
-      target("dx.CBuffer", target("dx.Layout", {float}, 4, 0)) %buffer,
+      target("dx.CBuffer", <{ float }>) %buffer,
       i32 0)
   %data = extractvalue {float, float, float, float} %load, 0
 
@@ -27,12 +27,12 @@ define void @loadf32() {
 ; CHECK-LABEL: define void @loadf64
 define void @loadf64() {
   %buffer = call
-      target("dx.CBuffer", target("dx.Layout", {double, double, double, double}, 64, 0, 8, 16, 24))
+      target("dx.CBuffer", <{ <4 x double> }>)
       @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
 
   ; CHECK: [[DATA:%.*]] = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %{{.*}}, i32 1)
   %load = call {double, double} @llvm.dx.resource.load.cbufferrow.2(
-      target("dx.CBuffer", target("dx.Layout", {double, double, double, double}, 64, 0, 8, 16, 24)) %buffer,
+      target("dx.CBuffer", <{ <4 x double> }>) %buffer,
       i32 1)
   %data = extractvalue {double, double} %load, 1
 
@@ -46,12 +46,12 @@ define void @loadf64() {
 ; CHECK-LABEL: define void @loadf16
 define void @loadf16() {
   %buffer = call
-      target("dx.CBuffer", target("dx.Layout", {half}, 2, 0))
+      target("dx.CBuffer", <{ half }>)
       @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
 
   ; CHECK: [[DATA:%.*]] = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %{{.*}}, i32 0)
   %load = call {half, half, half, half, half, half, half, half} @llvm.dx.resource.load.cbufferrow.8(
-      target("dx.CBuffer", target("dx.Layout", {half}, 2, 0)) %buffer,
+      target("dx.CBuffer", <{ half }>) %buffer,
       i32 0)
   %data = extractvalue {half, half, half, half, half, half, half, half} %load, 0
 
diff --git a/llvm/test/CodeGen/DirectX/ContainerData/PSVResources-order.ll b/llvm/test/CodeGen/DirectX/ContainerData/PSVResources-order.ll
index bcf82a67a55df..5cd67be1a4a28 100644
--- a/llvm/test/CodeGen/DirectX/ContainerData/PSVResources-order.ll
+++ b/llvm/test/CodeGen/DirectX/ContainerData/PSVResources-order.ll
@@ -18,7 +18,7 @@ define void @main() #0 {
   %srv0 = call target("dx.RawBuffer", i8, 0, 0)
       @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0t(
           i32 1, i32 8, i32 1, i32 0, ptr null)
-  %cbuf = call target("dx.CBuffer", target("dx.Layout", {float}, 4, 0))
+  %cbuf = call target("dx.CBuffer", <{ float }>)
       @llvm.dx.resource.handlefrombinding(i32 3, i32 2, i32 1, i32 0, ptr null)
   ret void
 }
diff --git a/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll b/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll
index bea03102e4ccf..a1beccab68f5d 100644
--- a/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll
+++ b/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll
@@ -14,7 +14,7 @@ define void @main() #0 {
 ; CHECK:          Kind:            CBuffer
 ; CHECK:          Flags:
 ; CHECK:            UsedByAtomic64:  false
-  %cbuf = call target("dx.CBuffer", target("dx.Layout", {float}, 4, 0))
+  %cbuf = call target("dx.CBuffer", <{ float }>)
       @llvm.dx.resource.handlefrombinding(i32 3, i32 2, i32 1, i32 0, ptr null)
 
   ; ByteAddressBuffer Buf : register(t8, space1)
diff --git a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll
index 38f2de28dbe59..671fcef281314 100644
--- a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll
+++ b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll
@@ -72,7 +72,7 @@ define void @test_bindings() {
   ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF5]], %dx.types.ResourceProperties { i32 10, i32 1033 }) #[[#ATTR]]
 
   ; cbuffer cb0 : register(b0) { int4 i; float4 f; }
-  %cb0 = call target("dx.CBuffer", target("dx.Layout", {<4 x i32>, <4 x float>}, 32, 0, 16))
+  %cb0 = call target("dx.CBuffer", <{ <4 x i32>, <4 x float> }>)
       @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
   ; CHECK: [[BUF6:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 2 }, i32 0, i1 false) #[[#ATTR]]
   ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF6]], %dx.types.ResourceProperties { i32 13, i32 32 }) #[[#ATTR]]
diff --git a/llvm/test/CodeGen/DirectX/ForwardHandleAccesses/cbuffer-access.ll b/llvm/test/CodeGen/DirectX/ForwardHandleAccesses/cbuffer-access.ll
index 26b157f35a3e4..d67486324d220 100644
--- a/llvm/test/CodeGen/DirectX/ForwardHandleAccesses/cbuffer-access.ll
+++ b/llvm/test/CodeGen/DirectX/ForwardHandleAccesses/cbuffer-access.ll
@@ -4,27 +4,27 @@
 %__cblayout_CB2 = type <{ float }>
 %struct.Scalars = type { float, i32, i32 }
 
- at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 12, 0, 4, 8)) poison
- at CB2.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB2, 4, 0)) poison
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
+ at CB2.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB2) poison
 
 define void @main() local_unnamed_addr #1 {
 entry:
   ; CHECK: [[CB:%.*]] = tail call target({{.*}}) @llvm.dx.resource.handlefrombinding
-  %h = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 12, 0, 4, 8)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
-  store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 12, 0, 4, 8)) %h, ptr @CB.cb, align 4
+  %h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+  store target("dx.CBuffer", %__cblayout_CB) %h, ptr @CB.cb, align 4
   %_ZL3Out_h.i.i = tail call target("dx.RawBuffer", %struct.Scalars, 1, 0) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
   ; CHECK-NOT: load target({{.*}}), ptr @CB.cb
-  %cb = load target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 12, 0, 4, 8)), ptr @CB.cb, align 4
+  %cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4
   ; CHECK: call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target({{.*}}) [[CB]], i32 0)
-  %0 = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4(target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 12, 0, 4, 8)) %cb, i32 0)
+  %0 = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4(target("dx.CBuffer", %__cblayout_CB) %cb, i32 0)
   %1 = extractvalue { float, float, float, float } %0, 0
   call void @llvm.dx.resource.store.rawbuffer(target("dx.RawBuffer", %struct.Scalars, 1, 0) %_ZL3Out_h.i.i, i32 0, i32 0, float %1)
-  
+
   ; CHECK: [[CB2:%.*]] = tail call target({{.*}}) @llvm.dx.resource.handlefromimplicitbinding
-  %h2 = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB2, 4, 0)) @llvm.dx.resource.handlefromimplicitbinding(i32 100, i32 0, i32 1, i32 0, ptr null)
-  store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB2, 4, 0)) %h2, ptr @CB2.cb, align 4
+  %h2 = tail call target("dx.CBuffer", %__cblayout_CB2) @llvm.dx.resource.handlefromimplicitbinding(i32 100, i32 0, i32 1, i32 0, ptr null)
+  store target("dx.CBuffer", %__cblayout_CB2) %h2, ptr @CB2.cb, align 4
   ; CHECK-NOT: load target({{.*}}), ptr @CB2.cb
-  %cb2 = load target("dx.CBuffer", target("dx.Layout", %__cblayout_CB2, 4, 0)), ptr @CB2.cb, align 4
+  %cb2 = load target("dx.CBuffer", %__cblayout_CB2), ptr @CB2.cb, align 4
 
   ret void
 }
diff --git a/llvm/test/CodeGen/DirectX/Metadata/cbuffer_metadata.ll b/llvm/test/CodeGen/DirectX/Metadata/cbuffer-layouttype.ll
similarity index 96%
rename from llvm/test/CodeGen/DirectX/Metadata/cbuffer_metadata.ll
rename to llvm/test/CodeGen/DirectX/Metadata/cbuffer-layouttype.ll
index 7ba2ed2988312..6595e51992b48 100644
--- a/llvm/test/CodeGen/DirectX/Metadata/cbuffer_metadata.ll
+++ b/llvm/test/CodeGen/DirectX/Metadata/cbuffer-layouttype.ll
@@ -1,3 +1,6 @@
+; TODO: Remove this test once we've updated the frontend to use explicit
+; padding. The cbuffer-metadata.ll test covers the newer logic.
+
 ; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s
 ; RUN: opt -S --passes="dxil-pretty-printer" < %s 2>&1 | FileCheck %s --check-prefix=PRINT
 ; RUN: llc %s --filetype=asm -o - < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,PRINT
diff --git a/llvm/test/CodeGen/DirectX/Metadata/cbuffer-metadata.ll b/llvm/test/CodeGen/DirectX/Metadata/cbuffer-metadata.ll
new file mode 100644
index 0000000000000..6b90e17816991
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/Metadata/cbuffer-metadata.ll
@@ -0,0 +1,89 @@
+; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s
+; RUN: opt -S --passes="dxil-pretty-printer" < %s 2>&1 | FileCheck %s --check-prefix=PRINT
+; RUN: llc %s --filetype=asm -o - < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,PRINT
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+%__cblayout_CB1 = type <{ float, i32, double, <2 x i32> }>
+ at CB1.cb = global target("dx.CBuffer", %__cblayout_CB1) poison
+ at CB1.str = private unnamed_addr constant [4 x i8] c"CB1\00", align 1
+
+%__cblayout_CB2 = type <{ float, target("dx.Padding", 4), double, float, half, i16, i64, i32 }>
+ at CB2.cb = global target("dx.CBuffer", %__cblayout_CB2) poison
+ at CB2.str = private unnamed_addr constant [4 x i8] c"CB2\00", align 1
+
+%__cblayout_MyConstants = type <{
+  double, target("dx.Padding", 8),
+  <3 x float>, float,
+  <3 x double>, half, target("dx.Padding", 6),
+  <2 x double>,
+  float, <3 x half>, <3 x half>
+}>
+ at MyConstants.cb = global target("dx.CBuffer", %__cblayout_MyConstants) poison
+ at MyConstants.str = private unnamed_addr constant [12 x i8] c"MyConstants\00", align 1
+
+; PRINT:; Resource Bindings:
+; PRINT-NEXT:;
+; PRINT-NEXT:; Name            Type  Format  Dim   ID    HLSL Bind  Count
+; PRINT-NEXT:; ----
+; PRINT-NEXT:; CB1          cbuffer      NA   NA  CB0          cb0     1
+; PRINT-NEXT:; CB2          cbuffer      NA   NA  CB1          cb1     1
+; PRINT-NEXT:; MyConstants  cbuffer      NA   NA  CB2  cb5,space15     1
+
+define void @test() #0 {
+
+  ; cbuffer CB1 : register(b0) {
+  ;   float a;
+  ;   int b;
+  ;   double c;
+  ;   int2 d;
+  ; }
+  %CB1.cb_h = call target("dx.CBuffer", %__cblayout_CB1)
+            @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr @CB1.str)
+
+  ; cbuffer CB2 : register(b0) {
+  ;   float a;
+  ;   double b;
+  ;   float c;
+  ;   half d;
+  ;   uint16_t e;
+  ;   int64_t f;
+  ;   int g;
+  ;}
+  %CB2.cb_h = call target("dx.CBuffer", %__cblayout_CB2)
+            @llvm.dx.resource.handlefrombinding(i32 0, i32 1, i32 1, i32 0, ptr @CB2.str)
+
+  ; cbuffer CB3 : register(b5) {
+  ;   double B0;
+  ;   float3 B1;
+  ;   float B2;
+  ;   double3 B3;
+  ;   half B4;
+  ;   double2 B5;
+  ;   float B6;
+  ;   half3 B7;
+  ;   half3 B8;
+  ; }
+  %CB3.cb_h = call target("dx.CBuffer", %__cblayout_MyConstants)
+            @llvm.dx.resource.handlefrombinding(i32 15, i32 5, i32 1, i32 0, ptr @MyConstants.str)
+
+  ret void
+}
+
+attributes #0 = { noinline nounwind "hlsl.shader"="compute" }
+
+; CHECK: %CBuffer.CB1 = type { { float, i32, double, <2 x i32> } }
+; CHECK: %CBuffer.CB2 = type { { float, double, float, half, i16, i64, i32 } }
+; CHECK: %CBuffer.MyConstants = type { { double, <3 x float>, float, <3 x double>, half, <2 x double>, float, <3 x half>, <3 x half> } }
+
+; CHECK: @CB1 = external constant %CBuffer.CB1
+; CHECK: @CB2 = external constant %CBuffer.CB2
+; CHECK: @MyConstants = external constant %CBuffer.MyConstants
+
+; CHECK: !dx.resources = !{[[ResList:[!][0-9]+]]}
+
+; CHECK: [[ResList]] = !{null, null, [[CBList:[!][0-9]+]], null}
+; CHECK: [[CBList]] = !{![[CB1:[0-9]+]], ![[CB2:[0-9]+]], ![[MYCONSTANTS:[0-9]+]]}
+; CHECK: ![[CB1]] = !{i32 0, ptr @CB1, !"CB1", i32 0, i32 0, i32 1, i32 24, null}
+; CHECK: ![[CB2]] = !{i32 1, ptr @CB2, !"CB2", i32 0, i32 1, i32 1, i32 36, null}
+; CHECK: ![[MYCONSTANTS]] = !{i32 2, ptr @MyConstants, !"MyConstants", i32 15, i32 5, i32 1, i32 96, null}
diff --git a/llvm/test/CodeGen/DirectX/Metadata/cbuffer-only.ll b/llvm/test/CodeGen/DirectX/Metadata/cbuffer-only.ll
index e2a1c09c13038..0b454c12a74a2 100644
--- a/llvm/test/CodeGen/DirectX/Metadata/cbuffer-only.ll
+++ b/llvm/test/CodeGen/DirectX/Metadata/cbuffer-only.ll
@@ -7,7 +7,7 @@
 target triple = "dxil-pc-shadermodel6.6-compute"
 
 define void @cbuffer_is_only_binding() {
-  %cbuf = call target("dx.CBuffer", target("dx.Layout", {float}, 4, 0))
+  %cbuf = call target("dx.CBuffer", <{ float }>)
       @llvm.dx.resource.handlefrombinding(i32 1, i32 8, i32 1, i32 0, ptr null)
   ; CHECK: %CBuffer = type { float }
 
diff --git a/llvm/unittests/Analysis/DXILResourceTest.cpp b/llvm/unittests/Analysis/DXILResourceTest.cpp
index ee37fad04f389..8c3a2134ae0d6 100644
--- a/llvm/unittests/Analysis/DXILResourceTest.cpp
+++ b/llvm/unittests/Analysis/DXILResourceTest.cpp
@@ -369,10 +369,8 @@ TEST(DXILResource, AnnotationsAndMetadata) {
   {
     StructType *CBufStruct =
         StructType::create(Context, {Floatx4Ty, Floatx4Ty}, "cb0");
-    TargetExtType *CBufLayoutType =
-        llvm::TargetExtType::get(Context, "dx.Layout", CBufStruct, {32, 0, 16});
     ResourceTypeInfo RTI(
-        llvm::TargetExtType::get(Context, "dx.CBuffer", CBufLayoutType));
+        llvm::TargetExtType::get(Context, "dx.CBuffer", CBufStruct));
     EXPECT_EQ(RTI.getResourceClass(), ResourceClass::CBuffer);
     EXPECT_EQ(RTI.getCBufferSize(DL), 32u);
     EXPECT_EQ(RTI.getResourceKind(), ResourceKind::CBuffer);

>From a832f680521203456c7b4f093e4cda41f5dbc7fb Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Thu, 18 Sep 2025 14:02:59 -0700
Subject: [PATCH 3/8] [AST] Give `CharUnits::operator%` a consistent type. NFC

Update the `operator%` overload that accepts `CharUnits` to return
`CharUnits` to match the other `operator%`. This is more logical than
returning an `int64` and cleans up users that want to continue to do
math with the result.

Many users of this were explicitly comparing against 0. I considered
updating these to compare against `CharUnits::Zero` or even introducing
an `explicit operator bool()`, but they all feel clearer if we update
them to use the existing `isMultipleOf()` function instead.
---
 clang/include/clang/AST/CharUnits.h         |  6 +++---
 clang/lib/AST/APValue.cpp                   |  2 +-
 clang/lib/AST/RecordLayoutBuilder.cpp       |  9 ++++-----
 clang/lib/CodeGen/CGAtomic.cpp              |  2 +-
 clang/lib/CodeGen/CGExprConstant.cpp        |  2 +-
 clang/lib/CodeGen/CGObjCMac.cpp             |  2 +-
 clang/lib/CodeGen/CGRecordLayoutBuilder.cpp | 12 ++++++------
 clang/lib/Sema/SemaChecking.cpp             |  2 +-
 clang/lib/StaticAnalyzer/Core/Store.cpp     |  2 +-
 9 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/clang/include/clang/AST/CharUnits.h b/clang/include/clang/AST/CharUnits.h
index c06354451dfbe..bd7457f1caeaa 100644
--- a/clang/include/clang/AST/CharUnits.h
+++ b/clang/include/clang/AST/CharUnits.h
@@ -141,7 +141,7 @@ namespace clang {
       /// Among other things, this promises that
       /// self.alignTo(N) will just return self.
       bool isMultipleOf(CharUnits N) const {
-        return (*this % N) == 0;
+        return (*this % N) == CharUnits::Zero();
       }
 
       // Arithmetic operators.
@@ -165,8 +165,8 @@ namespace clang {
       CharUnits operator% (QuantityType N) const {
         return CharUnits(Quantity % N);
       }
-      QuantityType operator% (const CharUnits &Other) const {
-        return Quantity % Other.Quantity;
+      CharUnits operator% (const CharUnits &Other) const {
+        return CharUnits(Quantity % Other.Quantity);
       }
       CharUnits operator+ (const CharUnits &Other) const {
         return CharUnits(Quantity + Other.Quantity);
diff --git a/clang/lib/AST/APValue.cpp b/clang/lib/AST/APValue.cpp
index 7173c2a0e1a2a..2e1c8eb3726cf 100644
--- a/clang/lib/AST/APValue.cpp
+++ b/clang/lib/AST/APValue.cpp
@@ -784,7 +784,7 @@ void APValue::printPretty(raw_ostream &Out, const PrintingPolicy &Policy,
       if (!O.isZero()) {
         if (IsReference)
           Out << "*(";
-        if (S.isZero() || O % S) {
+        if (S.isZero() || !O.isMultipleOf(S)) {
           Out << "(char*)";
           S = CharUnits::One();
         }
diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp
index 43f4e070748bb..00b938bdf308d 100644
--- a/clang/lib/AST/RecordLayoutBuilder.cpp
+++ b/clang/lib/AST/RecordLayoutBuilder.cpp
@@ -2087,9 +2087,8 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D,
   if (InsertExtraPadding) {
     CharUnits ASanAlignment = CharUnits::fromQuantity(8);
     CharUnits ExtraSizeForAsan = ASanAlignment;
-    if (FieldSize % ASanAlignment)
-      ExtraSizeForAsan +=
-          ASanAlignment - CharUnits::fromQuantity(FieldSize % ASanAlignment);
+    if (!FieldSize.isMultipleOf(ASanAlignment))
+      ExtraSizeForAsan += ASanAlignment - (FieldSize % ASanAlignment);
     EffectiveFieldSize = FieldSize = FieldSize + ExtraSizeForAsan;
   }
 
@@ -2119,10 +2118,10 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D,
     if (RD->hasAttr<PackedAttr>() || !MaxFieldAlignment.isZero())
       if (FieldAlign < OriginalFieldAlign)
         if (D->getType()->isRecordType()) {
-          // If the offset is a multiple of the alignment of
+          // If the offset is not a multiple of the alignment of
           // the type, raise the warning.
           // TODO: Takes no account the alignment of the outer struct
-          if (FieldOffset % OriginalFieldAlign != 0)
+          if (!FieldOffset.isMultipleOf(OriginalFieldAlign))
             Diag(D->getLocation(), diag::warn_unaligned_access)
                 << Context.getCanonicalTagType(RD) << D->getName()
                 << D->getType();
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index eeb0fd6412946..4a3446abcc78f 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -880,7 +880,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
   CharUnits MaxInlineWidth =
       getContext().toCharUnitsFromBits(MaxInlineWidthInBits);
   DiagnosticsEngine &Diags = CGM.getDiags();
-  bool Misaligned = (Ptr.getAlignment() % TInfo.Width) != 0;
+  bool Misaligned = !Ptr.getAlignment().isMultipleOf(TInfo.Width);
   bool Oversized = getContext().toBits(TInfo.Width) > MaxInlineWidthInBits;
   if (Misaligned) {
     Diags.Report(E->getBeginLoc(), diag::warn_atomic_op_misaligned)
diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index b44dd9ecc717e..6407afc3d9447 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -433,7 +433,7 @@ llvm::Constant *ConstantAggregateBuilder::buildFrom(
 
       // All remaining elements must be the same type.
       if (Elems[I]->getType() != CommonType ||
-          Offset(I) % ElemSize != 0) {
+          !Offset(I).isMultipleOf(ElemSize)) {
         CanEmitArray = false;
         break;
       }
diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp
index 60f30a1334d6d..dbcce9b86ad52 100644
--- a/clang/lib/CodeGen/CGObjCMac.cpp
+++ b/clang/lib/CodeGen/CGObjCMac.cpp
@@ -5367,7 +5367,7 @@ IvarLayoutBuilder::buildBitmap(CGObjCCommonMac &CGObjC,
 
     // Ignore scan requests that don't start at an even multiple of the
     // word size.  We can't encode them.
-    if ((beginOfScan % WordSize) != 0)
+    if (!beginOfScan.isMultipleOf(WordSize))
       continue;
 
     // Ignore scan requests that start before the instance start.
diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
index 5f6136c917ac2..e9205c68c2812 100644
--- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -369,11 +369,11 @@ void CGRecordLowering::lowerUnion(bool isNonVirtualBaseType) {
   appendPaddingBytes(LayoutSize - getSize(StorageType));
   // Set packed if we need it.
   const auto StorageAlignment = getAlignment(StorageType);
-  assert((Layout.getSize() % StorageAlignment == 0 ||
-          Layout.getDataSize() % StorageAlignment) &&
+  assert((Layout.getSize().isMultipleOf(StorageAlignment) ||
+          !Layout.getDataSize().isMultipleOf(StorageAlignment)) &&
          "Union's standard layout and no_unique_address layout must agree on "
          "packedness");
-  if (Layout.getDataSize() % StorageAlignment)
+  if (!Layout.getDataSize().isMultipleOf(StorageAlignment))
     Packed = true;
 }
 
@@ -977,7 +977,7 @@ void CGRecordLowering::determinePacked(bool NVBaseType) {
       continue;
     // If any member falls at an offset that it not a multiple of its alignment,
     // then the entire record must be packed.
-    if (Member.Offset % getAlignment(Member.Data))
+    if (!Member.Offset.isMultipleOf(getAlignment(Member.Data)))
       Packed = true;
     if (Member.Offset < NVSize)
       NVAlignment = std::max(NVAlignment, getAlignment(Member.Data));
@@ -985,12 +985,12 @@ void CGRecordLowering::determinePacked(bool NVBaseType) {
   }
   // If the size of the record (the capstone's offset) is not a multiple of the
   // record's alignment, it must be packed.
-  if (Members.back().Offset % Alignment)
+  if (!Members.back().Offset.isMultipleOf(Alignment))
     Packed = true;
   // If the non-virtual sub-object is not a multiple of the non-virtual
   // sub-object's alignment, it must be packed.  We cannot have a packed
   // non-virtual sub-object and an unpacked complete object or vise versa.
-  if (NVSize % NVAlignment)
+  if (!NVSize.isMultipleOf(NVAlignment))
     Packed = true;
   // Update the alignment of the sentinel.
   if (!Packed)
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 39c3aa2243338..8633aaf8531d9 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -15946,7 +15946,7 @@ void Sema::RefersToMemberWithReducedAlignment(
   }
 
   // Check if the synthesized offset fulfills the alignment.
-  if (Offset % ExpectedAlignment != 0 ||
+  if (!Offset.isMultipleOf(ExpectedAlignment) ||
       // It may fulfill the offset it but the effective alignment may still be
       // lower than the expected expression alignment.
       CompleteObjectAlignment < ExpectedAlignment) {
diff --git a/clang/lib/StaticAnalyzer/Core/Store.cpp b/clang/lib/StaticAnalyzer/Core/Store.cpp
index 971e6bc798837..b609f36ae7e89 100644
--- a/clang/lib/StaticAnalyzer/Core/Store.cpp
+++ b/clang/lib/StaticAnalyzer/Core/Store.cpp
@@ -210,7 +210,7 @@ std::optional<const MemRegion *> StoreManager::castRegion(const MemRegion *R,
           // Is the offset a multiple of the size?  If so, we can layer the
           // ElementRegion (with elementType == PointeeTy) directly on top of
           // the base region.
-          if (off % pointeeTySize == 0) {
+          if (off.isMultipleOf(pointeeTySize)) {
             newIndex = off / pointeeTySize;
             newSuperR = baseR;
           }

>From 9d3ee226f0685d1ff8bf5e6c52966ce5246f6a57 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Tue, 19 Aug 2025 13:46:38 -0600
Subject: [PATCH 4/8] [DirectX] Make a test a bit more readable. NFC

CHECK-lines ignore whitespace, so we can remove some here and make this
a bit easier to read.
---
 .../CodeGen/DirectX/Metadata/cbuffer-layouttype.ll     | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/test/CodeGen/DirectX/Metadata/cbuffer-layouttype.ll b/llvm/test/CodeGen/DirectX/Metadata/cbuffer-layouttype.ll
index 6595e51992b48..85952c9ae4e83 100644
--- a/llvm/test/CodeGen/DirectX/Metadata/cbuffer-layouttype.ll
+++ b/llvm/test/CodeGen/DirectX/Metadata/cbuffer-layouttype.ll
@@ -22,11 +22,11 @@ target triple = "dxil-pc-shadermodel6.6-compute"
 
 ; PRINT:; Resource Bindings:
 ; PRINT-NEXT:;
-; PRINT-NEXT:; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; PRINT-NEXT:; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-; PRINT-NEXT:; CB1                               cbuffer      NA          NA     CB0            cb0     1
-; PRINT-NEXT:; CB2                               cbuffer      NA          NA     CB1            cb1     1
-; PRINT-NEXT:; MyConstants                       cbuffer      NA          NA     CB2    cb5,space15     1
+; PRINT-NEXT:; Name            Type  Format  Dim   ID    HLSL Bind  Count
+; PRINT-NEXT:; ----
+; PRINT-NEXT:; CB1          cbuffer      NA   NA  CB0          cb0     1
+; PRINT-NEXT:; CB2          cbuffer      NA   NA  CB1          cb1     1
+; PRINT-NEXT:; MyConstants  cbuffer      NA   NA  CB2  cb5,space15     1
 
 define void @test() #0 {
 

>From 5797e1849d99d0814f1dbbf16920eb92a28201b2 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Tue, 9 Sep 2025 14:08:37 -0700
Subject: [PATCH 5/8] [DirectX] Fix crash when naming buffers of arrays

DXILResource was falling over trying to name a resource type that
contained an array, such as `StructuredBuffer<float[3][2]>`. Handle this
by walking through array types to gather the dimensions.
---
 llvm/lib/Analysis/DXILResource.cpp                     | 8 ++++++++
 llvm/test/CodeGen/DirectX/Metadata/resource-symbols.ll | 9 ++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp
index f9bf09262dd1f..6f19a68dcd194 100644
--- a/llvm/lib/Analysis/DXILResource.cpp
+++ b/llvm/lib/Analysis/DXILResource.cpp
@@ -255,6 +255,12 @@ static void formatTypeName(SmallString<64> &Dest, StringRef Name,
   if (!ContainedType)
     return;
 
+  SmallVector<uint64_t> ArrayDimensions;
+  while (ArrayType *AT = dyn_cast<ArrayType>(ContainedType)) {
+    ArrayDimensions.push_back(AT->getNumElements());
+    ContainedType = AT->getElementType();
+  }
+
   StringRef ElementName;
   ElementType ET = toDXILElementType(ContainedType, IsSigned);
   if (ET != ElementType::Invalid) {
@@ -271,6 +277,8 @@ static void formatTypeName(SmallString<64> &Dest, StringRef Name,
   DestStream << "<" << ElementName;
   if (const FixedVectorType *VTy = dyn_cast<FixedVectorType>(ContainedType))
     DestStream << VTy->getNumElements();
+  for (uint64_t Dim : ArrayDimensions)
+    DestStream << "[" << Dim << "]";
   DestStream << ">";
 }
 
diff --git a/llvm/test/CodeGen/DirectX/Metadata/resource-symbols.ll b/llvm/test/CodeGen/DirectX/Metadata/resource-symbols.ll
index 4f13f4789cd66..56798c8382d45 100644
--- a/llvm/test/CodeGen/DirectX/Metadata/resource-symbols.ll
+++ b/llvm/test/CodeGen/DirectX/Metadata/resource-symbols.ll
@@ -28,6 +28,11 @@ define void @test() {
       @llvm.dx.resource.handlefrombinding(i32 0, i32 10, i32 1, i32 0, ptr @SB.str)
   ; CHECK: %"StructuredBuffer<struct.S>" = type { %struct.S }
 
+  ; StructuredBuffer<float[3][2]>
+  %struct1 = call target("dx.RawBuffer", [3 x [2 x float]], 0, 0)
+      @llvm.dx.resource.handlefrombinding(i32 0, i32 12, i32 1, i32 0, ptr null)
+  ; CHECK: %"StructuredBuffer<float[3][2]>" = type { [3 x [2 x float]] }
+
   ; ByteAddressBuffer
   %byteaddr = call target("dx.RawBuffer", i8, 0, 0)
       @llvm.dx.resource.handlefrombinding(i32 0, i32 20, i32 1, i32 0, ptr null)
@@ -40,12 +45,14 @@ define void @test() {
 ; CHECK-NEXT: @[[T1:.*]] = external constant %"Buffer<int32_t>"
 ; CHECK-NEXT: @[[T2:.*]] = external constant %"Buffer<uint32_t3>"
 ; CHECK-NEXT: @[[S0:.*]] = external constant %"StructuredBuffer<struct.S>"
+; CHECK-NEXT: @[[S1:.*]] = external constant %"StructuredBuffer<float[3][2]>"
 ; CHECK-NEXT: @[[B0:.*]] = external constant %ByteAddressBuffer
 
 ; CHECK: !{i32 0, ptr @[[T0]], !"A"
 ; CHECK: !{i32 1, ptr @[[T1]], !""
 ; CHECK: !{i32 2, ptr @[[T2]], !""
 ; CHECK: !{i32 3, ptr @[[S0]], !"SB"
-; CHECK: !{i32 4, ptr @[[B0]], !""
+; CHECK: !{i32 4, ptr @[[S1]], !""
+; CHECK: !{i32 5, ptr @[[B0]], !""
 
 attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }

>From 030d8cb65d4b438c1d3098c6d5efaadbc800db4d Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Wed, 11 Jun 2025 16:21:41 -0400
Subject: [PATCH 6/8] wip: Update DXILCBufferAccess to padding approach

---
 clang/lib/CodeGen/TargetInfo.h                |  14 ++
 clang/lib/CodeGen/Targets/DirectX.cpp         |  13 ++
 clang/lib/CodeGen/Targets/SPIR.cpp            |  14 ++
 llvm/include/llvm/Frontend/HLSL/CBuffer.h     |   6 +-
 llvm/lib/Frontend/HLSL/CBuffer.cpp            |  35 ++-
 llvm/lib/IR/Type.cpp                          |   4 +
 llvm/lib/Target/DirectX/DXILCBufferAccess.cpp |  93 +-------
 .../DirectX/CBufferAccess/array-typedgep.ll   |  16 +-
 .../CodeGen/DirectX/CBufferAccess/arrays.ll   |  97 ++++----
 .../CodeGen/DirectX/CBufferAccess/float.ll    |   6 +-
 .../DirectX/CBufferAccess/gep-ce-two-uses.ll  |  18 +-
 .../CodeGen/DirectX/CBufferAccess/memcpy.ll   | 216 ------------------
 .../CodeGen/DirectX/CBufferAccess/scalars.ll  |  34 +--
 .../CodeGen/DirectX/CBufferAccess/vectors.ll  |  36 +--
 14 files changed, 179 insertions(+), 423 deletions(-)
 delete mode 100644 llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll

diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index d0edae1295094..8b59fde4b4120 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -448,6 +448,20 @@ class TargetCodeGenInfo {
     return nullptr;
   }
 
+  virtual llvm::Type *
+  getHLSLPadding(CodeGenModule &CGM, CharUnits NumBytes) const {
+    return llvm::ArrayType::get(llvm::Type::getInt8Ty(CGM.getLLVMContext()),
+                                NumBytes.getQuantity());
+  }
+
+  virtual bool isHLSLPadding(llvm::Type *Ty) const {
+    // TODO: Do we actually want to default these functions like this?
+    if (auto *AT = dyn_cast<llvm::ArrayType>(Ty))
+      if (AT->getElementType() == llvm::Type::getInt8Ty(Ty->getContext()))
+        return true;
+    return false;
+  }
+
   // Set the Branch Protection Attributes of the Function accordingly to the
   // BPI. Remove attributes that contradict with current BPI.
   static void
diff --git a/clang/lib/CodeGen/Targets/DirectX.cpp b/clang/lib/CodeGen/Targets/DirectX.cpp
index b4cebb9a32aca..d44e531dacdff 100644
--- a/clang/lib/CodeGen/Targets/DirectX.cpp
+++ b/clang/lib/CodeGen/Targets/DirectX.cpp
@@ -32,6 +32,19 @@ class DirectXTargetCodeGenInfo : public TargetCodeGenInfo {
   llvm::Type *
   getHLSLType(CodeGenModule &CGM, const Type *T,
               const SmallVector<int32_t> *Packoffsets = nullptr) const override;
+
+  llvm::Type *getHLSLPadding(CodeGenModule &CGM,
+                             CharUnits NumBytes) const override {
+    unsigned Size = NumBytes.getQuantity();
+    return llvm::TargetExtType::get(CGM.getLLVMContext(), "dx.Padding", {},
+                                    {Size});
+  }
+
+  bool isHLSLPadding(llvm::Type *Ty) const override {
+    if (auto *TET = dyn_cast<llvm::TargetExtType>(Ty))
+      return TET->getName() == "dx.Padding";
+    return false;
+  }
 };
 
 llvm::Type *DirectXTargetCodeGenInfo::getHLSLType(
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 2e3fc53c58edc..a945f203c15e5 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -56,6 +56,20 @@ class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo {
   llvm::Type *
   getHLSLType(CodeGenModule &CGM, const Type *Ty,
               const SmallVector<int32_t> *Packoffsets = nullptr) const override;
+
+  llvm::Type *
+  getHLSLPadding(CodeGenModule &CGM, CharUnits NumBytes) const override {
+    unsigned Size = NumBytes.getQuantity();
+    return llvm::TargetExtType::get(CGM.getLLVMContext(), "spirv.Padding", {},
+                                    {Size});
+  }
+
+  bool isHLSLPadding(llvm::Type *Ty) const override {
+    if (auto *TET = dyn_cast<llvm::TargetExtType>(Ty))
+      return TET->getName() == "spirv.Padding";
+    return false;
+  }
+
   llvm::Type *getSPIRVImageTypeFromHLSLResource(
       const HLSLAttributedResourceType::Attributes &attributes,
       QualType SampledType, CodeGenModule &CGM) const;
diff --git a/llvm/include/llvm/Frontend/HLSL/CBuffer.h b/llvm/include/llvm/Frontend/HLSL/CBuffer.h
index 694a7fa854576..f4e232ffe1745 100644
--- a/llvm/include/llvm/Frontend/HLSL/CBuffer.h
+++ b/llvm/include/llvm/Frontend/HLSL/CBuffer.h
@@ -46,7 +46,8 @@ class CBufferMetadata {
   CBufferMetadata(NamedMDNode *MD) : MD(MD) {}
 
 public:
-  static std::optional<CBufferMetadata> get(Module &M);
+  static std::optional<CBufferMetadata>
+  get(Module &M, llvm::function_ref<bool(Type *)> IsPadding);
 
   using iterator = SmallVector<CBufferMapping>::iterator;
   iterator begin() { return Mappings.begin(); }
@@ -55,9 +56,6 @@ class CBufferMetadata {
   void eraseFromModule();
 };
 
-APInt translateCBufArrayOffset(const DataLayout &DL, APInt Offset,
-                               ArrayType *Ty);
-
 } // namespace hlsl
 } // namespace llvm
 
diff --git a/llvm/lib/Frontend/HLSL/CBuffer.cpp b/llvm/lib/Frontend/HLSL/CBuffer.cpp
index 407b6ad6d5a7e..329357ac8f08e 100644
--- a/llvm/lib/Frontend/HLSL/CBuffer.cpp
+++ b/llvm/lib/Frontend/HLSL/CBuffer.cpp
@@ -15,25 +15,28 @@
 using namespace llvm;
 using namespace llvm::hlsl;
 
-static size_t getMemberOffset(GlobalVariable *Handle, size_t Index) {
+static SmallVector<size_t>
+getMemberOffsets(const DataLayout &DL, GlobalVariable *Handle,
+                 llvm::function_ref<bool(Type *)> IsPadding) {
+  SmallVector<size_t> Offsets;
+
   auto *HandleTy = cast<TargetExtType>(Handle->getValueType());
   assert((HandleTy->getName().ends_with(".CBuffer") ||
           HandleTy->getName() == "spirv.VulkanBuffer") &&
          "Not a cbuffer type");
   assert(HandleTy->getNumTypeParameters() == 1 && "Expected layout type");
+  auto *LayoutTy = cast<StructType>(HandleTy->getTypeParameter(0));
 
-  auto *LayoutTy = cast<TargetExtType>(HandleTy->getTypeParameter(0));
-  assert(LayoutTy->getName().ends_with(".Layout") && "Not a layout type");
-
-  // Skip the "size" parameter.
-  size_t ParamIndex = Index + 1;
-  assert(LayoutTy->getNumIntParameters() > ParamIndex &&
-         "Not enough parameters");
+  const StructLayout *SL = DL.getStructLayout(LayoutTy);
+  for (int I = 0, E = LayoutTy->getNumElements(); I < E; ++I)
+    if (!IsPadding(LayoutTy->getElementType(I)))
+      Offsets.push_back(SL->getElementOffset(I));
 
-  return LayoutTy->getIntParameter(ParamIndex);
+  return Offsets;
 }
 
-std::optional<CBufferMetadata> CBufferMetadata::get(Module &M) {
+std::optional<CBufferMetadata>
+CBufferMetadata::get(Module &M, llvm::function_ref<bool(Type *)> IsPadding) {
   NamedMDNode *CBufMD = M.getNamedMetadata("hlsl.cbs");
   if (!CBufMD)
     return std::nullopt;
@@ -47,13 +50,16 @@ std::optional<CBufferMetadata> CBufferMetadata::get(Module &M) {
         cast<ValueAsMetadata>(MD->getOperand(0))->getValue());
     CBufferMapping &Mapping = Result->Mappings.emplace_back(Handle);
 
+    SmallVector<size_t> MemberOffsets =
+        getMemberOffsets(M.getDataLayout(), Handle, IsPadding);
+
     for (int I = 1, E = MD->getNumOperands(); I < E; ++I) {
       Metadata *OpMD = MD->getOperand(I);
       // Some members may be null if they've been optimized out.
       if (!OpMD)
         continue;
       auto *V = cast<GlobalVariable>(cast<ValueAsMetadata>(OpMD)->getValue());
-      Mapping.Members.emplace_back(V, getMemberOffset(Handle, I - 1));
+      Mapping.Members.emplace_back(V, MemberOffsets[I - 1]);
     }
   }
 
@@ -64,10 +70,3 @@ void CBufferMetadata::eraseFromModule() {
   // Remove the cbs named metadata
   MD->eraseFromParent();
 }
-
-APInt hlsl::translateCBufArrayOffset(const DataLayout &DL, APInt Offset,
-                                     ArrayType *Ty) {
-  int64_t TypeSize = DL.getTypeSizeInBits(Ty->getElementType()) / 8;
-  int64_t RoundUp = alignTo(TypeSize, Align(CBufferRowSizeInBytes));
-  return Offset.udiv(TypeSize) * RoundUp;
-}
diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp
index 0e9535d24a4cc..b060a6c4f3bf6 100644
--- a/llvm/lib/IR/Type.cpp
+++ b/llvm/lib/IR/Type.cpp
@@ -1008,6 +1008,10 @@ static TargetTypeInfo getTargetTypeInfo(const TargetExtType *Ty) {
   }
   if (Name == "spirv.IntegralConstant" || Name == "spirv.Literal")
     return TargetTypeInfo(Type::getVoidTy(C));
+  if (Name == "spirv.Padding")
+    return TargetTypeInfo(
+        ArrayType::get(Type::getInt8Ty(C), Ty->getIntParameter(0)),
+        TargetExtType::CanBeGlobal);
   if (Name.starts_with("spirv."))
     return TargetTypeInfo(PointerType::get(C, 0), TargetExtType::HasZeroInit,
                           TargetExtType::CanBeGlobal,
diff --git a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
index 44277971acd60..e6ba437ce417e 100644
--- a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
@@ -8,6 +8,7 @@
 
 #include "DXILCBufferAccess.h"
 #include "DirectX.h"
+#include "llvm/Analysis/DXILResource.h"
 #include "llvm/Frontend/HLSL/CBuffer.h"
 #include "llvm/Frontend/HLSL/HLSLResource.h"
 #include "llvm/IR/IRBuilder.h"
@@ -97,10 +98,6 @@ struct CBufferResource {
       (void)Success;
       assert(Success && "Offsets into cbuffer globals must be constant");
 
-      if (auto *ATy = dyn_cast<ArrayType>(Member->getValueType()))
-        ConstantOffset =
-            hlsl::translateCBufArrayOffset(DL, ConstantOffset, ATy);
-
       return ConstantOffset.getZExtValue();
     }
 
@@ -194,88 +191,14 @@ static void replaceLoad(LoadInst *LI, CBufferResource &CBR,
   DeadInsts.push_back(LI);
 }
 
-/// This function recursively copies N array elements from the cbuffer resource
-/// CBR to the MemCpy Destination. Recursion is used to unravel multidimensional
-/// arrays into a sequence of scalar/vector extracts and stores.
-static void copyArrayElemsForMemCpy(IRBuilder<> &Builder, MemCpyInst *MCI,
-                                    CBufferResource &CBR, ArrayType *ArrTy,
-                                    size_t ArrOffset, size_t N,
-                                    const Twine &Name = "") {
-  const DataLayout &DL = MCI->getDataLayout();
-  Type *ElemTy = ArrTy->getElementType();
-  size_t ElemTySize = DL.getTypeAllocSize(ElemTy);
-  for (unsigned I = 0; I < N; ++I) {
-    size_t Offset = ArrOffset + I * ElemTySize;
-
-    // Recursively copy nested arrays
-    if (ArrayType *ElemArrTy = dyn_cast<ArrayType>(ElemTy)) {
-      copyArrayElemsForMemCpy(Builder, MCI, CBR, ElemArrTy, Offset,
-                              ElemArrTy->getNumElements(), Name);
-      continue;
-    }
-
-    // Load CBuffer value and store it in Dest
-    APInt CBufArrayOffset(
-        DL.getIndexTypeSizeInBits(MCI->getSource()->getType()), Offset);
-    CBufArrayOffset =
-        hlsl::translateCBufArrayOffset(DL, CBufArrayOffset, ArrTy);
-    Value *CBufferVal =
-        CBR.loadValue(Builder, ElemTy, CBufArrayOffset.getZExtValue(), Name);
-    Value *GEP =
-        Builder.CreateInBoundsGEP(Builder.getInt8Ty(), MCI->getDest(),
-                                  {Builder.getInt32(Offset)}, Name + ".dest");
-    Builder.CreateStore(CBufferVal, GEP, MCI->isVolatile());
-  }
-}
-
-/// Replace memcpy from a cbuffer global with a memcpy from the cbuffer handle
-/// itself. Assumes the cbuffer global is an array, and the length of bytes to
-/// copy is divisible by array element allocation size.
-/// The memcpy source must also be a direct cbuffer global reference, not a GEP.
-static void replaceMemCpy(MemCpyInst *MCI, CBufferResource &CBR) {
-
-  ArrayType *ArrTy = dyn_cast<ArrayType>(CBR.getValueType());
-  assert(ArrTy && "MemCpy lowering is only supported for array types");
-
-  // This assumption vastly simplifies the implementation
-  if (MCI->getSource() != CBR.Member)
-    reportFatalUsageError(
-        "Expected MemCpy source to be a cbuffer global variable");
-
-  ConstantInt *Length = dyn_cast<ConstantInt>(MCI->getLength());
-  uint64_t ByteLength = Length->getZExtValue();
-
-  // If length to copy is zero, no memcpy is needed
-  if (ByteLength == 0) {
-    MCI->eraseFromParent();
-    return;
-  }
-
-  const DataLayout &DL = CBR.getDataLayout();
-
-  Type *ElemTy = ArrTy->getElementType();
-  size_t ElemSize = DL.getTypeAllocSize(ElemTy);
-  assert(ByteLength % ElemSize == 0 &&
-         "Length of bytes to MemCpy must be divisible by allocation size of "
-         "source/destination array elements");
-  size_t ElemsToCpy = ByteLength / ElemSize;
-
-  IRBuilder<> Builder(MCI);
-  CBR.createAndSetCurrentHandle(Builder);
-
-  copyArrayElemsForMemCpy(Builder, MCI, CBR, ArrTy, 0, ElemsToCpy,
-                          "memcpy." + MCI->getDest()->getName() + "." +
-                              MCI->getSource()->getName());
-
-  MCI->eraseFromParent();
-}
-
 static void replaceAccessesWithHandle(CBufferResource &CBR) {
   SmallVector<WeakTrackingVH> DeadInsts;
 
   SmallVector<User *> ToProcess{CBR.users()};
   while (!ToProcess.empty()) {
     User *Cur = ToProcess.pop_back_val();
+    assert(!isa<MemCpyInst>(Cur) &&
+           "memcpy should have been removed in an earlier pass");
 
     // If we have a load instruction, replace the access.
     if (auto *LI = dyn_cast<LoadInst>(Cur)) {
@@ -283,13 +206,6 @@ static void replaceAccessesWithHandle(CBufferResource &CBR) {
       continue;
     }
 
-    // If we have a memcpy instruction, replace it with multiple accesses and
-    // subsequent stores to the destination
-    if (auto *MCI = dyn_cast<MemCpyInst>(Cur)) {
-      replaceMemCpy(MCI, CBR);
-      continue;
-    }
-
     // Otherwise, walk users looking for a load...
     if (isa<GetElementPtrInst>(Cur) || isa<GEPOperator>(Cur)) {
       ToProcess.append(Cur->user_begin(), Cur->user_end());
@@ -302,7 +218,8 @@ static void replaceAccessesWithHandle(CBufferResource &CBR) {
 }
 
 static bool replaceCBufferAccesses(Module &M) {
-  std::optional<hlsl::CBufferMetadata> CBufMD = hlsl::CBufferMetadata::get(M);
+  std::optional<hlsl::CBufferMetadata> CBufMD = hlsl::CBufferMetadata::get(
+      M, [](Type *Ty) { return isa<llvm::dxil::PaddingExtType>(Ty); });
   if (!CBufMD)
     return false;
 
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll
index 52ad0f3df1aba..10ea19df1d177 100644
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll
@@ -3,24 +3,24 @@
 ; cbuffer CB : register(b0) {
 ;   float a1[3];
 ; }
-%__cblayout_CB = type <{ [3 x float] }>
+%__cblayout_CB = type <{ [2 x <{ float, [12 x i8] }>], float }>
 
- at CB.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) poison
+ at CB.cb = global target("dx.CBuffer", %__cblayout_CB) poison
 ; CHECK: @CB.cb =
 ; CHECK-NOT: external {{.*}} addrspace(2) global
- at a1 = external addrspace(2) global [3 x float], align 4
+ at a1 = external addrspace(2) global <{ [2 x <{ float, [12 x i8] }>], float }>, align 4
 
 ; CHECK: define void @f
 define void @f(ptr %dst) {
 entry:
-  %CB.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBs_36_0tt(i32 0, i32 0, i32 1, i32 0, ptr null)
-  store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) %CB.cb_h, ptr @CB.cb, align 4
+  %CB.cb_h = call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+  store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
   ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
   ; CHECK: store float [[X]], ptr %dst
-  %a1 = load float, ptr addrspace(2) getelementptr inbounds ([3 x float], ptr addrspace(2) @a1, i32 0, i32 1), align 4
+  %a1 = load float, ptr addrspace(2) getelementptr inbounds (<{ [2 x <{ float, [12 x i8] }>], float }>, ptr addrspace(2) @a1, i32 0, i32 0, i32 1), align 4
   store float %a1, ptr %dst, align 32
 
   ret void
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll
index db4e14c1336a6..714df11a3a1fa 100644
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll
@@ -1,47 +1,60 @@
 ; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s
 
+; TODO: Remove datalayout.
+; This hack forces dxil-compatible alignment of 3-element 32- and 64-bit vectors
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64-v96:32:32-v192:64:64"
+
 ; cbuffer CB : register(b0) {
-;   float a1[3];
-;   double3 a2[2];
-;   float16_t a3[2][2];
-;   uint64_t a4[3];
-;   int4 a5[2][3][4];
-;   uint16_t a6[1];
-;   int64_t a7[2];
-;   bool a8[4];
+;   float a1[3];        // offset   0,  size 4  (+12) * 3
+;   double3 a2[2];      // offset   48, size 24  (+8) * 2
+;   float16_t a3[2][2]; // offset  112, size  2 (+14) * 4
+;   uint64_t a4[3];     // offset  176, size  8  (+8) * 3
+;   int4 a5[2][3][4];   // offset  224, size 16       * 24
+;   uint16_t a6[1];     // offset  608, size  2 (+14) * 1
+;   int64_t a7[2];      // offset  624, size  8  (+8) * 2
+;   bool a8[4];         // offset  656, size  4 (+12) * 4
 ; }
-%__cblayout_CB = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [2 x [3 x [4 x <4 x i32>]]], [1 x i16], [2 x i64], [4 x i32] }>
+%__cblayout_CB = type <{
+  <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, target("dx.Padding", 12),
+  <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, target("dx.Padding", 8),
+  <{ [3 x <{ half, target("dx.Padding", 14) }>], half }>, target("dx.Padding", 14),
+  <{ [2 x <{ i64, target("dx.Padding", 8) }>], i64 }>, target("dx.Padding", 8),
+  [24 x <4 x i32>],
+  [1 x i16], target("dx.Padding", 14),
+  <{ [1 x <{ i64, target("dx.Padding", 8) }>], i64 }>, target("dx.Padding", 8),
+  <{ [3 x <{ i32, target("dx.Padding", 12) }>], i32 }>
+}>
 
- at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) poison
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
 ; CHECK: @CB.cb =
 ; CHECK-NOT: external {{.*}} addrspace(2) global
- at a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4
- at a2 = external local_unnamed_addr addrspace(2) global [2 x <3 x double>], align 32
- at a3 = external local_unnamed_addr addrspace(2) global [2 x [2 x half]], align 2
- at a4 = external local_unnamed_addr addrspace(2) global [3 x i64], align 8
- at a5 = external local_unnamed_addr addrspace(2) global [2 x [3 x [4 x <4 x i32>]]], align 16
+ at a1 = external local_unnamed_addr addrspace(2) global <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, align 4
+ at a2 = external local_unnamed_addr addrspace(2) global <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, align 32
+ at a3 = external local_unnamed_addr addrspace(2) global <{ [3 x <{ half, target("dx.Padding", 14) }>], half }>, align 2
+ at a4 = external local_unnamed_addr addrspace(2) global <{ [2 x <{ i64, target("dx.Padding", 8) }>], i64 }>, align 8
+ at a5 = external local_unnamed_addr addrspace(2) global [24 x <4 x i32>], align 16
 @a6 = external local_unnamed_addr addrspace(2) global [1 x i16], align 2
- at a7 = external local_unnamed_addr addrspace(2) global [2 x i64], align 8
- at a8 = external local_unnamed_addr addrspace(2) global [4 x i32], align 4
+ at a7 = external local_unnamed_addr addrspace(2) global <{ [1 x <{ i64, target("dx.Padding", 8) }>], i64 }>, align 8
+ at a8 = external local_unnamed_addr addrspace(2) global <{ [3 x <{ i32, target("dx.Padding", 12) }>], i32 }>, align 4
 
 ; CHECK: define void @f
 define void @f(ptr %dst) {
 entry:
-  %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
-  store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) %CB.cb_h.i.i, ptr @CB.cb, align 4
+  %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+  store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
   ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
   ; CHECK: store float [[X]], ptr %dst
-  %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 4), align 4
+  %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 16), align 4
   store float %a1, ptr %dst, align 32
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 5)
   ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
   ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
-  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 6)
+  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 6)
   ; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
   ; CHECK: [[VEC0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
   ; CHECK: [[VEC1:%.*]] = insertelement <3 x double> [[VEC0]], double [[Y]], i32 1
@@ -52,26 +65,26 @@ entry:
   %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 8
   store <3 x double> %a2, ptr %a2.i, align 32
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 8)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 8)
   ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 32
   ; CHECK: store half [[X]], ptr [[PTR]]
-  %a3 = load half, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a3, i32 6), align 2
+  %a3 = load half, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a3, i32 16), align 2
   %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 32
   store half %a3, ptr %a3.i, align 2
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 12)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 12)
   ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 40
   ; CHECK: store i64 [[X]], ptr [[PTR]]
-  %a4 = load i64, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a4, i32 8), align 8
+  %a4 = load i64, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a4, i32 16), align 8
   %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 40
   store i64 %a4, ptr %a4.i, align 8
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 26)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 26)
   ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
   ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
   ; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
@@ -82,12 +95,12 @@ entry:
   ; CHECK: [[VEC3:%.*]] = insertelement <4 x i32> [[VEC2]], i32 [[A]], i32 3
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 48
   ; CHECK: store <4 x i32> [[VEC3]], ptr [[PTR]]
-  %a5 = load <4 x i32>, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a5, i32 272), align 4
+  %a5 = load <4 x i32>, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a5, i32 192), align 4
   %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 48
   store <4 x i32> %a5, ptr %a5.i, align 4
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 38)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 38)
   ; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 64
   ; CHECK: store i16 [[X]], ptr [[PTR]]
@@ -95,21 +108,21 @@ entry:
   %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 64
   store i16 %a6, ptr %a6.i, align 2
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 40)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 40)
   ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 72
   ; CHECK: store i64 [[X]], ptr [[PTR]]
-  %a7 = load i64, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a7, i32 8), align 8
+  %a7 = load i64, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a7, i32 16), align 8
   %a7.i = getelementptr inbounds nuw i8, ptr %dst, i32 72
   store i64 %a7, ptr %a7.i, align 8
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 42)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 42)
   ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 80
   ; CHECK: store i32 [[X]], ptr [[PTR]]
-  %a8 = load i32, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a8, i32 4), align 4, !range !1, !noundef !2
+  %a8 = load i32, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a8, i32 16), align 4, !range !1, !noundef !2
   %a8.i = getelementptr inbounds nuw i8, ptr %dst, i32 80
   store i32 %a8, ptr %a8.i, align 4
 
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll
index d7272b449166d..6f45fb6ae2e24 100644
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll
@@ -2,7 +2,7 @@
 
 %__cblayout_CB = type <{ float }>
 
- at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 4, 0)) poison
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
 ; CHECK: @CB.cb =
 ; CHECK-NOT: external {{.*}} addrspace(2) global
 @x = external local_unnamed_addr addrspace(2) global float, align 4
@@ -10,8 +10,8 @@
 ; CHECK: define void @f
 define void @f(ptr %dst) {
 entry:
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0)
   ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
   ; CHECK: store float [[X]], ptr %dst
   %x = load float, ptr addrspace(2) @x, align 4
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll
index abe087dbe6100..21724d32f10d0 100644
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll
@@ -3,28 +3,28 @@
 ; cbuffer CB : register(b0) {
 ;   float a1[3];
 ; }
-%__cblayout_CB = type <{ [3 x float] }>
+%__cblayout_CB = type <{ [2 x <{ float, [12 x i8] }>], float }>
 
- at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) poison
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
 ; CHECK: @CB.cb =
 ; CHECK-NOT: external {{.*}} addrspace(2) global
- at a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4
+ at a1 = external addrspace(2) global <{ [2 x <{ float, [12 x i8] }>], float }>, align 4
 
 ; CHECK: define void @f
 define void @f(ptr %dst) {
 entry:
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
   ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
   ; CHECK: store float [[X]], ptr %dst
-  %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 4), align 4
+  %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 16), align 4
   store float %a1, ptr %dst, align 32
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
   ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
   ; CHECK: store float [[X]], ptr %dst
-  %a2 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 4), align 4
+  %a2 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 16), align 4
   store float %a2, ptr %dst, align 32
 
   ret void
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll
deleted file mode 100644
index f1486f974fb36..0000000000000
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll
+++ /dev/null
@@ -1,216 +0,0 @@
-; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s
-
-; cbuffer CB : register(b0) {
-;   float a1[3];
-;   double3 a2[2];
-;   float16_t a3[2][2];
-;   uint64_t a4[3];
-;   int2 a5[3][2];
-;   uint16_t a6[1];
-;   int64_t a7[2];
-;   bool a8[4];
-; }
-%__cblayout_CB = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [3 x [2 x <2 x i32>]], [1 x i16], [2 x i64], [4 x i32] }>
-
- at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) poison
- at a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4
- at a2 = external local_unnamed_addr addrspace(2) global [2 x <3 x double>], align 32
- at a3 = external local_unnamed_addr addrspace(2) global [2 x [2 x half]], align 2
- at a4 = external local_unnamed_addr addrspace(2) global [3 x i64], align 8
- at a5 = external local_unnamed_addr addrspace(2) global [3 x [2 x <2 x i32>]], align 16
- at a6 = external local_unnamed_addr addrspace(2) global [1 x i16], align 2
- at a7 = external local_unnamed_addr addrspace(2) global [2 x i64], align 8
- at a8 = external local_unnamed_addr addrspace(2) global [4 x i32], align 4
-
-; CHECK: define void @f(
-define void @f(ptr %dst) {
-entry:
-  %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
-  store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) %CB.cb_h.i.i, ptr @CB.cb, align 4
-
-  %a1.copy = alloca [3 x float], align 4
-  %a2.copy = alloca [2 x <3 x double>], align 32
-  %a3.copy = alloca [2 x [2 x half]], align 2
-  %a4.copy = alloca [3 x i64], align 8
-  %a5.copy = alloca [3 x [2 x <2 x i32>]], align 16
-  %a6.copy = alloca [1 x i16], align 2
-  %a7.copy = alloca [2 x i64], align 8
-  %a8.copy = alloca [4 x i32], align 4
-
-  ; Try copying no elements
-; CHECK-NOT: memcpy
-  call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a1.copy, ptr addrspace(2) align 4 @a1, i32 0, i1 false)
-
-  ; Try copying only the first element
-; CHECK:    [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK:    [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
-; CHECK:    [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY:%.*]], i32 0
-; CHECK:    store float [[X]], ptr [[DEST]], align 4
-  call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a1.copy, ptr addrspace(2) align 4 @a1, i32 4, i1 false)
-
-; CHECK:    [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK:    [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
-; CHECK:    [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY:%.*]], i32 0
-; CHECK:    store float [[X]], ptr [[DEST]], align 4
-; CHECK:    [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
-; CHECK:    [[Y:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY]], i32 4
-; CHECK:    store float [[Y]], ptr [[DEST]], align 4
-; CHECK:    [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 2)
-; CHECK:    [[Z:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY]], i32 8
-; CHECK:    store float [[Z]], ptr [[DEST]], align 4
-  call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a1.copy, ptr addrspace(2) align 4 @a1, i32 12, i1 false)
-
-; CHECK:    [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK:    [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 3)
-; CHECK:    [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
-; CHECK:    [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
-; CHECK:    [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 4)
-; CHECK:    [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
-; CHECK:    [[UPTO0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
-; CHECK:    [[UPTO1:%.*]] = insertelement <3 x double> [[UPTO0]], double [[Y]], i32 1
-; CHECK:    [[UPTO2:%.*]] = insertelement <3 x double> [[UPTO1]], double [[Z]], i32 2
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY:%.*]], i32 0
-; CHECK:    store <3 x double> [[UPTO2]], ptr [[DEST]], align 8
-; CHECK:    [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5)
-; CHECK:    [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
-; CHECK:    [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
-; CHECK:    [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 6)
-; CHECK:    [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
-; CHECK:    [[UPTO0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
-; CHECK:    [[UPTO1:%.*]] = insertelement <3 x double> [[UPTO0]], double [[Y]], i32 1
-; CHECK:    [[UPTO2:%.*]] = insertelement <3 x double> [[UPTO1]], double [[Z]], i32 2
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY]], i32 24
-; CHECK:    store <3 x double> [[UPTO2]], ptr [[DEST]], align 8
-  call void @llvm.memcpy.p0.p2.i32(ptr align 32 %a2.copy, ptr addrspace(2) align 32 @a2, i32 48, i1 false)
-
-; CHECK:    [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK:    [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 7)
-; CHECK:    [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY:%.*]], i32 0
-; CHECK:    store half [[X]], ptr [[DEST]], align 2
-; CHECK:    [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 8)
-; CHECK:    [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 2
-; CHECK:    store half [[Y]], ptr [[DEST]], align 2
-; CHECK:    [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 9)
-; CHECK:    [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 4
-; CHECK:    store half [[X]], ptr [[DEST]], align 2
-; CHECK:    [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 10)
-; CHECK:    [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 6
-; CHECK:    store half [[Y]], ptr [[DEST]], align 2
-  call void @llvm.memcpy.p0.p2.i32(ptr align 2 %a3.copy, ptr addrspace(2) align 2 @a3, i32 8, i1 false)
-
-; CHECK:    [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK:    [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 11)
-; CHECK:    [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY:%.*]], i32 0
-; CHECK:    store i64 [[X]], ptr [[DEST]], align 8
-; CHECK:    [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 12)
-; CHECK:    [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY]], i32 8
-; CHECK:    store i64 [[Y]], ptr [[DEST]], align 8
-; CHECK:    [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 13)
-; CHECK:    [[Z:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY]], i32 16
-; CHECK:    store i64 [[Z]], ptr [[DEST]], align 8
-  call void @llvm.memcpy.p0.p2.i32(ptr align 8 %a4.copy, ptr addrspace(2) align 8 @a4, i32 24, i1 false)
-
-; CHECK:    [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK:    [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 14)
-; CHECK:    [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK:    [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
-; CHECK:    [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
-; CHECK:    [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY:%.*]], i32 0
-; CHECK:    store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
-; CHECK:    [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 15)
-; CHECK:    [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK:    [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
-; CHECK:    [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
-; CHECK:    [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 8
-; CHECK:    store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
-; CHECK:    [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 16)
-; CHECK:    [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK:    [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
-; CHECK:    [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
-; CHECK:    [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 16
-; CHECK:    store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
-; CHECK:    [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 17)
-; CHECK:    [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK:    [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
-; CHECK:    [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
-; CHECK:    [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 24
-; CHECK:    store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
-; CHECK:    [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 18)
-; CHECK:    [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK:    [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
-; CHECK:    [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
-; CHECK:    [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 32
-; CHECK:    store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
-; CHECK:    [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 19)
-; CHECK:    [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK:    [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
-; CHECK:    [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
-; CHECK:    [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 40
-; CHECK:    store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
-  call void @llvm.memcpy.p0.p2.i32(ptr align 16 %a5.copy, ptr addrspace(2) align 16 @a5, i32 48, i1 false)
-
-; CHECK:    [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK:    [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 17)
-; CHECK:    [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A6_COPY:%.*]], i32 0
-; CHECK:    store i16 [[X]], ptr [[DEST]], align 2
-  call void @llvm.memcpy.p0.p2.i32(ptr align 2 %a6.copy, ptr addrspace(2) align 2 @a6, i32 2, i1 false)
-
-; CHECK:    [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK:    [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 18)
-; CHECK:    [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A7_COPY:%.*]], i32 0
-; CHECK:    store i64 [[X]], ptr [[DEST]], align 8
-; CHECK:    [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 19)
-; CHECK:    [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A7_COPY]], i32 8
-; CHECK:    store i64 [[Y]], ptr [[DEST]], align 8
-  call void @llvm.memcpy.p0.p2.i32(ptr align 8 %a7.copy, ptr addrspace(2) align 8 @a7, i32 16, i1 false)
-
-; CHECK:    [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK:    [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 20)
-; CHECK:    [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY:%.*]], i32 0
-; CHECK:    store i32 [[X]], ptr [[DEST]], align 4
-; CHECK:    [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 21)
-; CHECK:    [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 4
-; CHECK:    store i32 [[Y]], ptr [[DEST]], align 4
-; CHECK:    [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 22)
-; CHECK:    [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 8
-; CHECK:    store i32 [[Z]], ptr [[DEST]], align 4
-; CHECK:    [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 23)
-; CHECK:    [[W:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 12
-; CHECK:    store i32 [[W]], ptr [[DEST]], align 4
-  call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a8.copy, ptr addrspace(2) align 4 @a8, i32 16, i1 false)
-
-  ret void
-}
-
-declare void @llvm.memcpy.p0.p2.i32(ptr noalias writeonly captures(none), ptr addrspace(2) noalias readonly captures(none), i32, i1 immarg)
-
-; CHECK-NOT: !hlsl.cbs =
-!hlsl.cbs = !{!0}
-
-!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6, ptr addrspace(2) @a7, ptr addrspace(2) @a8}
-!1 = !{i32 0, i32 2}
-!2 = !{}
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll
index 7857c25d69636..e53ba1c273240 100644
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll
@@ -11,7 +11,7 @@
 ; }
 %__cblayout_CB = type <{ float, i32, i32, half, i16, double, i64 }>
 
- at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) poison
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
 ; CHECK: @CB.cb =
 ; CHECK-NOT: external {{.*}} addrspace(2) global
 @a1 = external local_unnamed_addr addrspace(2) global float, align 4
@@ -25,18 +25,18 @@
 ; CHECK: define void @f
 define void @f(ptr %dst) {
 entry:
-  %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
-  store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) %CB.cb_h.i.i, ptr @CB.cb, align 4
+  %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+  store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0)
   ; CHECK: [[A1:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
   ; CHECK: store float [[A1]], ptr %dst
   %a1 = load float, ptr addrspace(2) @a1, align 4
   store float %a1, ptr %dst, align 8
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0)
   ; CHECK: [[A2:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4
   ; CHECK: store i32 [[A2]], ptr [[PTR]]
@@ -44,8 +44,8 @@ entry:
   %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 4
   store i32 %a2, ptr %a2.i, align 4
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0)
   ; CHECK: [[A3:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 8
   ; CHECK: store i32 [[A3]], ptr [[PTR]]
@@ -53,8 +53,8 @@ entry:
   %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 8
   store i32 %a3, ptr %a3.i, align 8
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0)
   ; CHECK: [[A4:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 6
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 12
   ; CHECK: store half [[A4]], ptr [[PTR]]
@@ -62,8 +62,8 @@ entry:
   %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 12
   store half %a4, ptr %a4.i, align 4
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0)
   ; CHECK: [[A5:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 7
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 14
   ; CHECK: store i16 [[A5]], ptr [[PTR]]
@@ -71,8 +71,8 @@ entry:
   %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 14
   store i16 %a5, ptr %a5.i, align 2
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
   ; CHECK: [[A6:%.*]] = extractvalue { double, double } [[LOAD]], 0
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 16
   ; CHECK: store double [[A6]], ptr [[PTR]]
@@ -80,8 +80,8 @@ entry:
   %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 16
   store double %a6, ptr %a6.i, align 8
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
   ; CHECK: [[A7:%.*]] = extractvalue { i64, i64 } [[LOAD]], 1
   ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 24
   ; CHECK: store i64 [[A7]], ptr [[PTR]]
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll
index 4160008a986af..e99c77dbe2710 100644
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll
@@ -8,9 +8,9 @@
 ;   int4 a5;       // offset  80, size 16
 ;   uint16_t3 a6;  // offset  96, size  6 (+10)
 ; };
-%__cblayout_CB = type <{ <3 x float>, <3 x double>, <2 x half>, <3 x i64>, <4 x i32>, <3 x i16> }>
+%__cblayout_CB = type <{ <3 x float>, target("dx.Padding", 4), <3 x double>, <2 x half>, target("dx.Padding", 4), <3 x i64>, target("dx.Padding", 8), <4 x i32>, <3 x i16> }>
 
- at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 102, 0, 16, 40, 48, 80, 96)) poison
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
 ; CHECK: @CB.cb =
 ; CHECK-NOT: external {{.*}} addrspace(2) global
 @a1 = external local_unnamed_addr addrspace(2) global <3 x float>, align 16
@@ -23,11 +23,11 @@
 ; CHECK: define void @f
 define void @f(ptr %dst) {
 entry:
-  %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 102, 0, 16, 40, 48, 80, 96)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
-  store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 102, 0, 16, 40, 48, 80, 96)) %CB.cb_h.i.i, ptr @CB.cb, align 4
+  %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+  store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0)
   ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
   ; CHECK: [[Y:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 1
   ; CHECK: [[Z:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 2
@@ -38,11 +38,11 @@ entry:
   %a1 = load <3 x float>, ptr addrspace(2) @a1, align 16
   store <3 x float> %a1, ptr %dst, align 4
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
   ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
   ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
-  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 2)
+  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2)
   ; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
   ; CHECK: [[VEC0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
   ; CHECK: [[VEC1:%.*]] = insertelement <3 x double> [[VEC0]], double [[Y]], i32 1
@@ -53,8 +53,8 @@ entry:
   %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 16
   store <3 x double> %a2, ptr %a2.i, align 8
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 2)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2)
   ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 4
   ; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 5
   ; CHECK: [[VEC0:%.*]] = insertelement <2 x half> poison, half [[X]], i32 0
@@ -65,11 +65,11 @@ entry:
   %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 40
   store <2 x half> %a3, ptr %a3.i, align 2
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 3)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 3)
   ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
   ; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 1
-  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 4)
+  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 4)
   ; CHECK: [[Z:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
   ; CHECK: [[VEC0:%.*]] = insertelement <3 x i64> poison, i64 [[X]], i32 0
   ; CHECK: [[VEC1:%.*]] = insertelement <3 x i64> [[VEC0]], i64 [[Y]], i32 1
@@ -80,8 +80,8 @@ entry:
   %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 48
   store <3 x i64> %a4, ptr %a4.i, align 8
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 5)
   ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
   ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
   ; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
@@ -96,8 +96,8 @@ entry:
   %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 72
   store <4 x i32> %a5, ptr %a5.i, align 4
 
-  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
-  ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 6)
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 6)
   ; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0
   ; CHECK: [[Y:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 1
   ; CHECK: [[Z:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 2

>From 29843c0ab437ae28b396262b65f568f2232cee96 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Wed, 11 Jun 2025 11:09:21 -0400
Subject: [PATCH 7/8] wip: Explicit structs in clang codegen

This abandons the `dx.Layout` idea and just uses explicit padding.

Note: Reordered fields break stuff, including ones from implicit bindings.
---
 clang/lib/CodeGen/CGExpr.cpp                  |  20 ++
 clang/lib/CodeGen/CGExprAgg.cpp               | 127 ++++++++
 clang/lib/CodeGen/CGHLSLRuntime.cpp           |  24 +-
 clang/lib/CodeGen/CGHLSLRuntime.h             |   7 +-
 clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp | 270 +++++-------------
 clang/lib/CodeGen/HLSLBufferLayoutBuilder.h   |  47 +--
 clang/lib/CodeGen/Targets/DirectX.cpp         |   7 +-
 clang/lib/CodeGen/Targets/SPIR.cpp            |   7 +-
 clang/test/CodeGenHLSL/ArrayAssignable.hlsl   |  70 +++--
 .../GlobalConstructorFunction.hlsl            |   6 +-
 clang/test/CodeGenHLSL/resources/cbuffer.hlsl | 251 ++++++++++------
 .../resources/cbuffer_and_namespaces.hlsl     |  10 +-
 .../resources/cbuffer_with_packoffset.hlsl    |  13 +-
 ...uffer_with_static_global_and_function.hlsl |   2 +-
 .../resources/default_cbuffer.hlsl            |  11 +-
 .../default_cbuffer_with_layout.hlsl          |   3 +
 llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp  |   7 +-
 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp |   3 -
 .../CodeGen/SPIRV/hlsl-resources/cbuffer.ll   |   8 +-
 .../SPIRV/hlsl-resources/cbuffer_unused.ll    |  18 +-
 20 files changed, 528 insertions(+), 383 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index e6e4947882544..6c6a84709c32e 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4678,6 +4678,26 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
         emitArraySubscriptGEP(*this, Int8Ty, Addr.emitRawPointer(*this),
                               ScaledIdx, false, SignedIndices, E->getExprLoc());
     Addr = Address(EltPtr, OrigBaseElemTy, EltAlign);
+  } else if (E->getType().getAddressSpace() == LangAS::hlsl_constant) {
+    // This is an array inside of a cbuffer.
+    Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo);
+    auto *Idx = EmitIdxAfterBase(/*Promote*/true);
+
+    // ...
+    CharUnits RowAlignedSize = getContext()
+                                   .getTypeSizeInChars(E->getType())
+                                   .alignTo(CharUnits::fromQuantity(16));
+
+    llvm::Value *RowAlignedSizeVal =
+        llvm::ConstantInt::get(Idx->getType(), RowAlignedSize.getQuantity());
+    llvm::Value *ScaledIdx = Builder.CreateMul(Idx, RowAlignedSizeVal);
+
+    CharUnits EltAlign =
+      getArrayElementAlign(Addr.getAlignment(), Idx, RowAlignedSize);
+    llvm::Value *EltPtr =
+        emitArraySubscriptGEP(*this, Int8Ty, Addr.emitRawPointer(*this),
+                              ScaledIdx, false, SignedIndices, E->getExprLoc());
+    Addr = Address(EltPtr, Addr.getElementType(), EltAlign);
   } else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) {
     // If this is A[i] where A is an array, the frontend will have decayed the
     // base to be a ArrayToPointerDecay implicit cast.  While correct, it is
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index b8150a24d45fc..b696687cd3877 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -19,12 +19,14 @@
 #include "CodeGenModule.h"
 #include "ConstantEmitter.h"
 #include "EHScopeStack.h"
+#include "HLSLBufferLayoutBuilder.h"
 #include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/StmtVisitor.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -2280,6 +2282,127 @@ AggValueSlot::Overlap_t CodeGenFunction::getOverlapForBaseInit(
   return AggValueSlot::MayOverlap;
 }
 
+namespace {
+class HLSLBufferCopyEmitter {
+  CodeGenFunction &CGF;
+  Address DestPtr;
+  Address SrcPtr;
+  llvm::Type *LayoutTy = nullptr;
+
+  SmallVector<llvm::Value *> CurStoreIndices;
+  SmallVector<llvm::Value *> CurLoadIndices;
+
+  void emitCopyAtIndices(llvm::Type *FieldTy, unsigned StoreIndex,
+                         unsigned LoadIndex) {
+    CurStoreIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, StoreIndex));
+    CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, LoadIndex));
+    auto RestoreIndices = llvm::make_scope_exit([&]() {
+      CurStoreIndices.pop_back();
+      CurLoadIndices.pop_back();
+    });
+
+    if (processArray(FieldTy))
+      return;
+    if (processBufferLayoutArray(FieldTy))
+      return;
+    if (processStruct(FieldTy))
+      return;
+
+    // We have a scalar or vector element - emit a copy.
+    CharUnits Align = CharUnits::fromQuantity(
+        CGF.CGM.getDataLayout().getABITypeAlign(FieldTy));
+    Address SrcGEP = RawAddress(
+        CGF.Builder.CreateInBoundsGEP(LayoutTy, SrcPtr.getBasePointer(),
+                                      CurLoadIndices, "cbuf.src"),
+        FieldTy, Align, SrcPtr.isKnownNonNull());
+    Address DestGEP = CGF.Builder.CreateInBoundsGEP(
+        DestPtr, CurStoreIndices, FieldTy, Align, "cbuf.dest");
+    llvm::Value *Load = CGF.Builder.CreateLoad(SrcGEP, "cbuf.load");
+    CGF.Builder.CreateStore(Load, DestGEP);
+  }
+
+  bool processArray(llvm::Type *FieldTy) {
+    auto *AT = dyn_cast<llvm::ArrayType>(FieldTy);
+    if (!AT)
+      return false;
+
+    // If we have an array then there isn't any padding
+    // between elements. We just need to copy each element over.
+    for (unsigned I = 0, E = AT->getNumElements(); I < E; ++I)
+      emitCopyAtIndices(AT->getElementType(), I, I);
+    return true;
+  }
+
+  bool processBufferLayoutArray(llvm::Type *FieldTy) {
+    auto *ST = dyn_cast<llvm::StructType>(FieldTy);
+    if (!ST || ST->getNumElements() != 2)
+      return false;
+
+    auto *PaddedEltsTy = dyn_cast<llvm::ArrayType>(ST->getElementType(0));
+    if (!PaddedEltsTy)
+      return false;
+
+    auto *PaddedTy = dyn_cast<llvm::StructType>(PaddedEltsTy->getElementType());
+    if (!PaddedTy || PaddedTy->getNumElements() != 2)
+      return false;
+
+    if (!CGF.CGM.getTargetCodeGenInfo().isHLSLPadding(
+            PaddedTy->getElementType(1)))
+      return false;
+
+    llvm::Type *ElementTy = ST->getElementType(1);
+    if (PaddedTy->getElementType(0) != ElementTy)
+      return false;
+
+    // All but the last of the logical array elements are in the padded array.
+    unsigned NumElts = PaddedEltsTy->getNumElements() + 1;
+
+    // Add an extra indirection to the load for the struct and walk the
+    // array prefix.
+    CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, 0));
+    for (unsigned I = 0; I < NumElts - 1; ++I)
+      emitCopyAtIndices(ElementTy, I, I);
+    CurLoadIndices.pop_back();
+
+    // Now copy the last element.
+    emitCopyAtIndices(ElementTy, NumElts - 1, 1);
+
+    return true;
+  }
+
+  bool processStruct(llvm::Type *FieldTy) {
+    auto *ST = dyn_cast<llvm::StructType>(FieldTy);
+    if (!ST)
+      return false;
+
+    unsigned Skipped = 0;
+    for (unsigned I = 0, E = ST->getNumElements(); I < E; ++I) {
+      llvm::Type *ElementTy = ST->getElementType(I);
+      if (CGF.CGM.getTargetCodeGenInfo().isHLSLPadding(ElementTy))
+        ++Skipped;
+      else
+        emitCopyAtIndices(ElementTy, I, I + Skipped);
+    }
+    return true;
+  }
+
+public:
+  HLSLBufferCopyEmitter(CodeGenFunction &CGF, Address DestPtr, Address SrcPtr)
+      : CGF(CGF), DestPtr(DestPtr), SrcPtr(SrcPtr) {}
+
+  bool emitCopy(QualType CType) {
+    LayoutTy = HLSLBufferLayoutBuilder(CGF.CGM).layOutType(CType);
+
+    // If we don't have an aggregate, we can just fall back to normal memcpy.
+    if (!LayoutTy->isAggregateType())
+      return false;
+
+    emitCopyAtIndices(LayoutTy, 0, 0);
+    return true;
+  }
+};
+} // namespace
+
 void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty,
                                         AggValueSlot::Overlap_t MayOverlap,
                                         bool isVolatile) {
@@ -2315,6 +2438,10 @@ void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty,
     }
   }
 
+  if (getLangOpts().HLSL && Ty.getAddressSpace() == LangAS::hlsl_constant)
+    if (HLSLBufferCopyEmitter(*this, DestPtr, SrcPtr).emitCopy(Ty))
+      return;
+
   // Aggregate assignment turns into llvm.memcpy.  This is almost valid per
   // C99 6.5.16.1p3, which states "If the value being stored in an object is
   // read from another object that overlaps in anyway the storage of the first
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index cf018c8c7de2a..214151b8f5c3a 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -266,9 +266,9 @@ CGHLSLRuntime::convertHLSLSpecificType(const Type *T,
   assert(T->isHLSLSpecificType() && "Not an HLSL specific type!");
 
   // Check if the target has a specific translation for this type first.
-  if (llvm::Type *TargetTy =
+  if (llvm::Type *LayoutTy =
           CGM.getTargetCodeGenInfo().getHLSLType(CGM, T, Packoffsets))
-    return TargetTy;
+    return LayoutTy;
 
   llvm_unreachable("Generic handling of HLSL types is not supported.");
 }
@@ -285,10 +285,8 @@ void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl,
 
   // get the layout struct from constant buffer target type
   llvm::Type *BufType = BufGV->getValueType();
-  llvm::Type *BufLayoutType =
-      cast<llvm::TargetExtType>(BufType)->getTypeParameter(0);
   llvm::StructType *LayoutStruct = cast<llvm::StructType>(
-      cast<llvm::TargetExtType>(BufLayoutType)->getTypeParameter(0));
+      cast<llvm::TargetExtType>(BufType)->getTypeParameter(0));
 
   // Start metadata list associating the buffer global variable with its
   // constatns
@@ -327,6 +325,9 @@ void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl,
       continue;
     }
 
+    if (CGM.getTargetCodeGenInfo().isHLSLPadding(*ElemIt))
+      ++ElemIt;
+
     assert(ElemIt != LayoutStruct->element_end() &&
            "number of elements in layout struct does not match");
     llvm::Type *LayoutType = *ElemIt++;
@@ -424,12 +425,11 @@ void CGHLSLRuntime::addBuffer(const HLSLBufferDecl *BufDecl) {
   if (BufDecl->hasValidPackoffset())
     fillPackoffsetLayout(BufDecl, Layout);
 
-  llvm::TargetExtType *TargetTy =
-      cast<llvm::TargetExtType>(convertHLSLSpecificType(
-          ResHandleTy, BufDecl->hasValidPackoffset() ? &Layout : nullptr));
+  llvm::Type *LayoutTy = convertHLSLSpecificType(
+      ResHandleTy, BufDecl->hasValidPackoffset() ? &Layout : nullptr);
   llvm::GlobalVariable *BufGV = new GlobalVariable(
-      TargetTy, /*isConstant*/ false,
-      GlobalValue::LinkageTypes::ExternalLinkage, PoisonValue::get(TargetTy),
+      LayoutTy, /*isConstant*/ false,
+      GlobalValue::LinkageTypes::ExternalLinkage, PoisonValue::get(LayoutTy),
       llvm::formatv("{0}{1}", BufDecl->getName(),
                     BufDecl->isCBuffer() ? ".cb" : ".tb"),
       GlobalValue::NotThreadLocal);
@@ -462,7 +462,7 @@ void CGHLSLRuntime::addRootSignature(
                      SignatureDecl->getRootElements(), nullptr, M);
 }
 
-llvm::TargetExtType *
+llvm::StructType *
 CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) {
   const auto Entry = LayoutTypes.find(StructType);
   if (Entry != LayoutTypes.end())
@@ -471,7 +471,7 @@ CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) {
 }
 
 void CGHLSLRuntime::addHLSLBufferLayoutType(const RecordType *StructType,
-                                            llvm::TargetExtType *LayoutTy) {
+                                            llvm::StructType *LayoutTy) {
   assert(getHLSLBufferLayoutType(StructType) == nullptr &&
          "layout type for this struct already exist");
   LayoutTypes[StructType] = LayoutTy;
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 9c0e6056fd4ee..c79b7cded3a19 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -186,10 +186,9 @@ class CGHLSLRuntime {
 
   llvm::Instruction *getConvergenceToken(llvm::BasicBlock &BB);
 
-  llvm::TargetExtType *
-  getHLSLBufferLayoutType(const RecordType *LayoutStructTy);
+  llvm::StructType *getHLSLBufferLayoutType(const RecordType *LayoutStructTy);
   void addHLSLBufferLayoutType(const RecordType *LayoutStructTy,
-                               llvm::TargetExtType *LayoutTy);
+                               llvm::StructType *LayoutTy);
   void emitInitListOpaqueValues(CodeGenFunction &CGF, InitListExpr *E);
 
   std::optional<LValue>
@@ -207,7 +206,7 @@ class CGHLSLRuntime {
                                    HLSLResourceBindingAttr *RBA);
   llvm::Triple::ArchType getArch();
 
-  llvm::DenseMap<const clang::RecordType *, llvm::TargetExtType *> LayoutTypes;
+  llvm::DenseMap<const clang::RecordType *, llvm::StructType *> LayoutTypes;
 };
 
 } // namespace CodeGen
diff --git a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp
index 838903cdcd1ee..99b5602b18bff 100644
--- a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp
+++ b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp
@@ -9,6 +9,7 @@
 #include "HLSLBufferLayoutBuilder.h"
 #include "CGHLSLRuntime.h"
 #include "CodeGenModule.h"
+#include "TargetInfo.h"
 #include "clang/AST/Type.h"
 #include <climits>
 
@@ -19,71 +20,21 @@
 
 using namespace clang;
 using namespace clang::CodeGen;
-using llvm::hlsl::CBufferRowSizeInBytes;
 
-namespace {
-
-// Creates a new array type with the same dimentions but with the new
-// element type.
-static llvm::Type *
-createArrayWithNewElementType(CodeGenModule &CGM,
-                              const ConstantArrayType *ArrayType,
-                              llvm::Type *NewElemType) {
-  const clang::Type *ArrayElemType = ArrayType->getArrayElementTypeNoTypeQual();
-  if (ArrayElemType->isConstantArrayType())
-    NewElemType = createArrayWithNewElementType(
-        CGM, cast<const ConstantArrayType>(ArrayElemType), NewElemType);
-  return llvm::ArrayType::get(NewElemType, ArrayType->getSExtSize());
-}
-
-// Returns the size of a scalar or vector in bytes
-static unsigned getScalarOrVectorSizeInBytes(llvm::Type *Ty) {
-  assert(Ty->isVectorTy() || Ty->isIntegerTy() || Ty->isFloatingPointTy());
-  if (Ty->isVectorTy()) {
-    llvm::FixedVectorType *FVT = cast<llvm::FixedVectorType>(Ty);
-    return FVT->getNumElements() *
-           (FVT->getElementType()->getScalarSizeInBits() / 8);
-  }
-  return Ty->getScalarSizeInBits() / 8;
-}
-
-} // namespace
+static const CharUnits CBufferRowSize =
+    CharUnits::fromQuantity(llvm::hlsl::CBufferRowSizeInBytes);
 
 namespace clang {
 namespace CodeGen {
 
-// Creates a layout type for given struct or class with HLSL constant buffer
-// layout taking into account PackOffsets, if provided.
-// Previously created layout types are cached by CGHLSLRuntime.
-//
-// The function iterates over all fields of the record type (including base
-// classes) and calls layoutField to converts each field to its corresponding
-// LLVM type and to calculate its HLSL constant buffer layout. Any embedded
-// structs (or arrays of structs) are converted to target layout types as well.
-//
-// When PackOffsets are specified the elements will be placed based on the
-// user-specified offsets. Not all elements must have a packoffset/register(c#)
-// annotation though. For those that don't, the PackOffsets array will contain
-// -1 value instead. These elements must be placed at the end of the layout
-// after all of the elements with specific offset.
-llvm::TargetExtType *HLSLBufferLayoutBuilder::createLayoutType(
+llvm::StructType *HLSLBufferLayoutBuilder::layOutStruct(
     const RecordType *RT, const llvm::SmallVector<int32_t> *PackOffsets) {
 
   // check if we already have the layout type for this struct
-  if (llvm::TargetExtType *Ty =
-          CGM.getHLSLRuntime().getHLSLBufferLayoutType(RT))
+  // TODO: Do we need to check for matching PackOffsets?
+  if (llvm::StructType *Ty = CGM.getHLSLRuntime().getHLSLBufferLayoutType(RT))
     return Ty;
 
-  SmallVector<unsigned> Layout;
-  SmallVector<llvm::Type *> LayoutElements;
-  unsigned Index = 0; // packoffset index
-  unsigned EndOffset = 0;
-
-  SmallVector<std::pair<const FieldDecl *, unsigned>> DelayLayoutFields;
-
-  // reserve first spot in the layout vector for buffer size
-  Layout.push_back(0);
-
   // iterate over all fields of the record, including fields on base classes
   llvm::SmallVector<CXXRecordDecl *> RecordDecls;
   RecordDecls.push_back(RT->castAsCXXRecordDecl());
@@ -94,179 +45,102 @@ llvm::TargetExtType *HLSLBufferLayoutBuilder::createLayoutType(
     RecordDecls.push_back(D->bases_begin()->getType()->castAsCXXRecordDecl());
   }
 
-  unsigned FieldOffset;
-  llvm::Type *FieldType;
+  SmallVector<llvm::Type *> Layout;
+  SmallVector<const FieldDecl *> DelayLayoutFields;
+  CharUnits CurrentOffset = CharUnits::Zero();
+  auto LayOutField = [&](QualType FieldType) {
+    llvm::Type *LayoutType = layOutType(FieldType);
+
+    const llvm::DataLayout &DL = CGM.getDataLayout();
+    CharUnits Size =
+        CharUnits::fromQuantity(DL.getTypeSizeInBits(LayoutType) / 8);
+    CharUnits Align = CharUnits::fromQuantity(DL.getABITypeAlign(LayoutType));
+
+    if (LayoutType->isAggregateType() ||
+        (CurrentOffset % CBufferRowSize) + Size > CBufferRowSize)
+      Align = Align.alignTo(CBufferRowSize);
+
+    CharUnits NextOffset = CurrentOffset.alignTo(Align);
+    if (NextOffset > CurrentOffset) {
+      llvm::Type *Padding = CGM.getTargetCodeGenInfo().getHLSLPadding(
+          CGM, NextOffset - CurrentOffset);
+      Layout.emplace_back(Padding);
+      CurrentOffset = NextOffset;
+    }
+    Layout.emplace_back(LayoutType);
+    CurrentOffset += Size;
+  };
 
+  unsigned PackOffsetIndex = 0;
   while (!RecordDecls.empty()) {
     const CXXRecordDecl *RD = RecordDecls.pop_back_val();
 
     for (const auto *FD : RD->fields()) {
-      assert((!PackOffsets || Index < PackOffsets->size()) &&
+      assert((!PackOffsets || PackOffsetIndex < PackOffsets->size()) &&
              "number of elements in layout struct does not match number of "
              "packoffset annotations");
 
       // No PackOffset info at all, or have a valid packoffset/register(c#)
       // annotations value -> layout the field.
-      const int PO = PackOffsets ? (*PackOffsets)[Index++] : -1;
-      if (!PackOffsets || PO != -1) {
-        if (!layoutField(FD, EndOffset, FieldOffset, FieldType, PO))
-          return nullptr;
-        Layout.push_back(FieldOffset);
-        LayoutElements.push_back(FieldType);
+      const int PO = PackOffsets ? (*PackOffsets)[PackOffsetIndex++] : -1;
+      if (PO != -1) {
+        LayOutField(FD->getType());
         continue;
       }
       // Have PackOffset info, but there is no packoffset/register(cX)
       // annotation on this field. Delay the layout until after all of the
       // other elements with packoffsets/register(cX) are processed.
-      DelayLayoutFields.emplace_back(FD, LayoutElements.size());
-      // reserve space for this field in the layout vector and elements list
-      Layout.push_back(UINT_MAX);
-      LayoutElements.push_back(nullptr);
+      DelayLayoutFields.emplace_back(FD);
     }
   }
 
   // process delayed layouts
-  for (auto I : DelayLayoutFields) {
-    const FieldDecl *FD = I.first;
-    const unsigned IndexInLayoutElements = I.second;
-    // the first item in layout vector is size, so we need to offset the index
-    // by 1
-    const unsigned IndexInLayout = IndexInLayoutElements + 1;
-    assert(Layout[IndexInLayout] == UINT_MAX &&
-           LayoutElements[IndexInLayoutElements] == nullptr);
-
-    if (!layoutField(FD, EndOffset, FieldOffset, FieldType))
-      return nullptr;
-    Layout[IndexInLayout] = FieldOffset;
-    LayoutElements[IndexInLayoutElements] = FieldType;
-  }
+  for (const FieldDecl *FD : DelayLayoutFields)
+    LayOutField(FD->getType());
 
-  // set the size of the buffer
-  Layout[0] = EndOffset;
-
-  // create the layout struct type; anonymous struct have empty name but
+  // Create the layout struct type; anonymous structs have empty name but
   // non-empty qualified name
   const auto *Decl = RT->castAsCXXRecordDecl();
   std::string Name =
       Decl->getName().empty() ? "anon" : Decl->getQualifiedNameAsString();
-  llvm::StructType *StructTy =
-      llvm::StructType::create(LayoutElements, Name, true);
 
-  // create target layout type
-  llvm::TargetExtType *NewLayoutTy = llvm::TargetExtType::get(
-      CGM.getLLVMContext(), LayoutTypeName, {StructTy}, Layout);
-  if (NewLayoutTy)
-    CGM.getHLSLRuntime().addHLSLBufferLayoutType(RT, NewLayoutTy);
-  return NewLayoutTy;
+  llvm::StructType *NewTy = llvm::StructType::create(Layout, Name,
+                                                     /*isPacked=*/true);
+  CGM.getHLSLRuntime().addHLSLBufferLayoutType(RT, NewTy);
+  return NewTy;
 }
 
-// The function converts a single field of HLSL Buffer to its corresponding
-// LLVM type and calculates it's layout. Any embedded structs (or
-// arrays of structs) are converted to target layout types as well.
-// The converted type is set to the FieldType parameter, the element
-// offset is set to the FieldOffset parameter. The EndOffset (=size of the
-// buffer) is also updated accordingly to the offset just after the placed
-// element, unless the incoming EndOffset already larger (may happen in case
-// of unsorted packoffset annotations).
-// Returns true if the conversion was successful.
-// The packoffset parameter contains the field's layout offset provided by the
-// user or -1 if there was no packoffset (or register(cX)) annotation.
-bool HLSLBufferLayoutBuilder::layoutField(const FieldDecl *FD,
-                                          unsigned &EndOffset,
-                                          unsigned &FieldOffset,
-                                          llvm::Type *&FieldType,
-                                          int Packoffset) {
-
-  // Size of element; for arrays this is a size of a single element in the
-  // array. Total array size of calculated as (ArrayCount-1) * ArrayStride +
-  // ElemSize.
-  unsigned ElemSize = 0;
-  unsigned ElemOffset = 0;
-  unsigned ArrayCount = 1;
-  unsigned ArrayStride = 0;
-
-  unsigned NextRowOffset = llvm::alignTo(EndOffset, CBufferRowSizeInBytes);
-
-  llvm::Type *ElemLayoutTy = nullptr;
-  QualType FieldTy = FD->getType();
-
-  if (FieldTy->isConstantArrayType()) {
-    // Unwrap array to find the element type and get combined array size.
-    QualType Ty = FieldTy;
-    while (Ty->isConstantArrayType()) {
-      auto *ArrayTy = CGM.getContext().getAsConstantArrayType(Ty);
-      ArrayCount *= ArrayTy->getSExtSize();
-      Ty = ArrayTy->getElementType();
-    }
-    // For array of structures, create a new array with a layout type
-    // instead of the structure type.
-    if (Ty->isStructureOrClassType()) {
-      llvm::Type *NewTy = cast<llvm::TargetExtType>(
-          createLayoutType(Ty->getAsCanonical<RecordType>()));
-      if (!NewTy)
-        return false;
-      assert(isa<llvm::TargetExtType>(NewTy) && "expected target type");
-      ElemSize = cast<llvm::TargetExtType>(NewTy)->getIntParameter(0);
-      ElemLayoutTy = createArrayWithNewElementType(
-          CGM, cast<ConstantArrayType>(FieldTy.getTypePtr()), NewTy);
-    } else {
-      // Array of vectors or scalars
-      ElemSize =
-          getScalarOrVectorSizeInBytes(CGM.getTypes().ConvertTypeForMem(Ty));
-      ElemLayoutTy = CGM.getTypes().ConvertTypeForMem(FieldTy);
-    }
-    ArrayStride = llvm::alignTo(ElemSize, CBufferRowSizeInBytes);
-    ElemOffset = (Packoffset != -1) ? Packoffset : NextRowOffset;
-
-  } else if (FieldTy->isStructureOrClassType()) {
-    // Create a layout type for the structure
-    ElemLayoutTy = createLayoutType(
-        cast<RecordType>(FieldTy->getAsCanonical<RecordType>()));
-    if (!ElemLayoutTy)
-      return false;
-    assert(isa<llvm::TargetExtType>(ElemLayoutTy) && "expected target type");
-    ElemSize = cast<llvm::TargetExtType>(ElemLayoutTy)->getIntParameter(0);
-    ElemOffset = (Packoffset != -1) ? Packoffset : NextRowOffset;
-
-  } else {
-    // scalar or vector - find element size and alignment
-    unsigned Align = 0;
-    ElemLayoutTy = CGM.getTypes().ConvertTypeForMem(FieldTy);
-    if (ElemLayoutTy->isVectorTy()) {
-      // align vectors by sub element size
-      const llvm::FixedVectorType *FVT =
-          cast<llvm::FixedVectorType>(ElemLayoutTy);
-      unsigned SubElemSize = FVT->getElementType()->getScalarSizeInBits() / 8;
-      ElemSize = FVT->getNumElements() * SubElemSize;
-      Align = SubElemSize;
-    } else {
-      assert(ElemLayoutTy->isIntegerTy() || ElemLayoutTy->isFloatingPointTy());
-      ElemSize = ElemLayoutTy->getScalarSizeInBits() / 8;
-      Align = ElemSize;
-    }
+llvm::Type *HLSLBufferLayoutBuilder::layOutArray(const ConstantArrayType *AT) {
+  llvm::Type *EltTy = layOutType(AT->getElementType());
+  uint64_t Count = AT->getZExtSize();
+
+  CharUnits EltSize =
+      CharUnits::fromQuantity(CGM.getDataLayout().getTypeSizeInBits(EltTy) / 8);
+  CharUnits Padding = EltSize.alignTo(CBufferRowSize) - EltSize;
+
+  // If we don't have any padding between elements then we just need the array
+  // itself.
+  if (Count < 2 || Padding.isZero())
+    return llvm::ArrayType::get(EltTy, Count);
+
+  llvm::LLVMContext &Context = CGM.getLLVMContext();
+  llvm::Type *PaddingTy =
+      CGM.getTargetCodeGenInfo().getHLSLPadding(CGM, Padding);
+  auto *PaddedEltTy =
+      llvm::StructType::get(Context, {EltTy, PaddingTy}, /*isPacked=*/true);
+  return llvm::StructType::get(
+      Context, {llvm::ArrayType::get(PaddedEltTy, Count - 1), EltTy},
+      /*IsPacked=*/true);
+}
 
-    // calculate or get element offset for the vector or scalar
-    if (Packoffset != -1) {
-      ElemOffset = Packoffset;
-    } else {
-      ElemOffset = llvm::alignTo(EndOffset, Align);
-      // if the element does not fit, move it to the next row
-      if (ElemOffset + ElemSize > NextRowOffset)
-        ElemOffset = NextRowOffset;
-    }
-  }
+llvm::Type *HLSLBufferLayoutBuilder::layOutType(QualType Ty) {
+  if (const auto *AT = CGM.getContext().getAsConstantArrayType(Ty))
+    return layOutArray(AT);
 
-  // Update end offset of the layout; do not update it if the EndOffset
-  // is already bigger than the new value (which may happen with unordered
-  // packoffset annotations)
-  unsigned NewEndOffset =
-      ElemOffset + (ArrayCount - 1) * ArrayStride + ElemSize;
-  EndOffset = std::max<unsigned>(EndOffset, NewEndOffset);
+  if (Ty->isStructureOrClassType())
+    return layOutStruct(Ty->getAsCanonical<RecordType>());
 
-  // add the layout element and offset to the lists
-  FieldOffset = ElemOffset;
-  FieldType = ElemLayoutTy;
-  return true;
+  return CGM.getTypes().ConvertTypeForMem(Ty);
 }
 
 } // namespace CodeGen
diff --git a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h
index 61240b280cfcb..0515b469f8b03 100644
--- a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h
+++ b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h
@@ -6,13 +6,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "clang/AST/TypeBase.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/DerivedTypes.h"
 
 namespace clang {
-class RecordType;
-class FieldDecl;
-
 namespace CodeGen {
 class CodeGenModule;
 
@@ -24,23 +22,36 @@ class CodeGenModule;
 class HLSLBufferLayoutBuilder {
 private:
   CodeGenModule &CGM;
-  llvm::StringRef LayoutTypeName;
 
 public:
-  HLSLBufferLayoutBuilder(CodeGenModule &CGM, llvm::StringRef LayoutTypeName)
-      : CGM(CGM), LayoutTypeName(LayoutTypeName) {}
-
-  // Returns LLVM target extension type with the name LayoutTypeName
-  // for given structure type and layout data. The first number in
-  // the Layout is the size followed by offsets for each struct element.
-  llvm::TargetExtType *
-  createLayoutType(const RecordType *StructType,
-                   const llvm::SmallVector<int32_t> *Packoffsets = nullptr);
-
-private:
-  bool layoutField(const clang::FieldDecl *FD, unsigned &EndOffset,
-                   unsigned &FieldOffset, llvm::Type *&FieldType,
-                   int Packoffset = -1);
+  HLSLBufferLayoutBuilder(CodeGenModule &CGM) : CGM(CGM) {}
+
+  /// Lays out a struct type following HLSL buffer rules and considering
+  /// PackOffsets, if provided. Previously created layout structs are cached by
+  /// CGHLSLRuntime.
+  ///
+  /// The function iterates over all fields of the record type (including base
+  /// classes) and calls layoutField to converts each field to its corresponding
+  /// LLVM type and to calculate its HLSL constant buffer layout. Any embedded
+  /// structs (or arrays of structs) are converted to layout types as well.
+  ///
+  /// When PackOffsets are specified the elements will be placed based on the
+  /// user-specified offsets. Not all elements must have a
+  /// packoffset/register(c#) annotation though. For those that don't, the
+  /// PackOffsets array will contain -1 value instead. These elements must be
+  /// placed at the end of the layout after all of the elements with specific
+  /// offset.
+  llvm::StructType *
+  layOutStruct(const RecordType *StructType,
+               const llvm::SmallVector<int32_t> *Packoffsets = nullptr);
+
+  /// Lays out an array type following HLSL buffer rules.
+  llvm::Type *
+  layOutArray(const ConstantArrayType *AT);
+
+  /// Lays out a type following HLSL buffer rules. Arrays and structures will be
+  /// padded appropriately and nested objects will be converted as appropriate.
+  llvm::Type *layOutType(QualType Type);
 };
 
 } // namespace CodeGen
diff --git a/clang/lib/CodeGen/Targets/DirectX.cpp b/clang/lib/CodeGen/Targets/DirectX.cpp
index d44e531dacdff..6f6eb8beed5b9 100644
--- a/clang/lib/CodeGen/Targets/DirectX.cpp
+++ b/clang/lib/CodeGen/Targets/DirectX.cpp
@@ -88,10 +88,9 @@ llvm::Type *DirectXTargetCodeGenInfo::getHLSLType(
     if (ContainedTy.isNull() || !ContainedTy->isStructureType())
       return nullptr;
 
-    llvm::Type *BufferLayoutTy =
-        HLSLBufferLayoutBuilder(CGM, "dx.Layout")
-            .createLayoutType(ContainedTy->castAsCanonical<RecordType>(),
-                              Packoffsets);
+    llvm::StructType *BufferLayoutTy =
+        HLSLBufferLayoutBuilder(CGM).layOutStruct(
+            ContainedTy->getAsCanonical<RecordType>(), Packoffsets);
     if (!BufferLayoutTy)
       return nullptr;
 
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index a945f203c15e5..9db3daebd3786 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -513,10 +513,9 @@ llvm::Type *CommonSPIRTargetCodeGenInfo::getHLSLType(
     if (ContainedTy.isNull() || !ContainedTy->isStructureType())
       return nullptr;
 
-    llvm::Type *BufferLayoutTy =
-        HLSLBufferLayoutBuilder(CGM, "spirv.Layout")
-            .createLayoutType(ContainedTy->castAsCanonical<RecordType>(),
-                              Packoffsets);
+    llvm::StructType *BufferLayoutTy =
+        HLSLBufferLayoutBuilder(CGM).layOutStruct(
+            ContainedTy->getAsCanonical<RecordType>(), Packoffsets);
     uint32_t StorageClass = /* Uniform storage class */ 2;
     return llvm::TargetExtType::get(Ctx, "spirv.VulkanBuffer", {BufferLayoutTy},
                                     {StorageClass, false});
diff --git a/clang/test/CodeGenHLSL/ArrayAssignable.hlsl b/clang/test/CodeGenHLSL/ArrayAssignable.hlsl
index aaa486eff10b7..0a71801a860d2 100644
--- a/clang/test/CodeGenHLSL/ArrayAssignable.hlsl
+++ b/clang/test/CodeGenHLSL/ArrayAssignable.hlsl
@@ -5,18 +5,19 @@ struct S {
   float f;
 };
 
-// CHECK: [[CBLayout:%.*]] = type <{ [2 x float], [2 x <4 x i32>], [2 x [2 x i32]], [1 x target("dx.Layout", %S, 8, 0, 4)] }>
-// CHECK: @CBArrays.cb = global target("dx.CBuffer", target("dx.Layout", [[CBLayout]], 136, 0, 32, 64, 128))
-// CHECK: @c1 = external hidden addrspace(2) global [2 x float], align 4
+// CHECK: [[CBLayout:%.*]] = type <{ <{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, target("dx.Padding", 12), [2 x <4 x i32>], <{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, target("dx.Padding", 12), <{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }> }>
+
+// CHECK: @CBArrays.cb = global target("dx.CBuffer", [[CBLayout]])
+// CHECK: @c1 = external hidden addrspace(2) global <{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, align 4
 // CHECK: @c2 = external hidden addrspace(2) global [2 x <4 x i32>], align 16
-// CHECK: @c3 = external hidden addrspace(2) global [2 x [2 x i32]], align 4
-// CHECK: @c4 = external hidden addrspace(2) global [1 x target("dx.Layout", %S, 8, 0, 4)], align 1
+// CHECK: @c3 = external hidden addrspace(2) global <{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, align 4
+// CHECK: @c4 = external hidden addrspace(2) global <{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }>, align 1
 
 cbuffer CBArrays : register(b0) {
   float c1[2];
   int4 c2[2];
   int c3[2][2];
-  S c4[1];
+  S c4[2];
 }
 
 // CHECK-LABEL: define hidden void {{.*}}arr_assign1
@@ -140,40 +141,71 @@ void arr_assign7() {
 
 // CHECK-LABEL: define hidden void {{.*}}arr_assign8
 // CHECK: [[C:%.*]] = alloca [2 x float], align 4
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[C]], ptr align 4 {{.*}}, i32 8, i1 false)
-// CHECK-NEXT: call void @llvm.memcpy.p0.p2.i32(ptr align 4 [[C]], ptr addrspace(2) align 4 @c1, i32 8, i1 false)
+// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x float], ptr [[C]], i32 0
+// CHECK-NEXT: [[L0:%.*]] = load float, ptr addrspace(2) @c1, align 4
+// CHECK-NEXT: store float [[L0]], ptr [[V0]], align 4
+// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x float], ptr [[C]], i32 0, i32 1
+// CHECK-NEXT: [[L1:%.*]] = load float, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, ptr addrspace(2) @c1, i32 0, i32 1), align 4
+// CHECK-NEXT: store float [[L1]], ptr [[V1]], align 4
 // CHECK-NEXT: ret void
 void arr_assign8() {
-  float C[2] = {1.0, 2.0};
+  float C[2];
   C = c1;
 }
 
+// TODO: A memcpy would actually be valid here, since everything is aligned on
+// 16 byte boundaries.
+//
 // CHECK-LABEL: define hidden void {{.*}}arr_assign9
 // CHECK: [[C:%.*]] = alloca [2 x <4 x i32>], align 16
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[C]], ptr align 16 {{.*}}, i32 32, i1 false)
-// CHECK-NEXT: call void @llvm.memcpy.p0.p2.i32(ptr align 16 [[C]], ptr addrspace(2) align 16 @c2, i32 32, i1 false)
+// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[C]], i32 0
+// CHECK-NEXT: [[L0:%.*]] = load <4 x i32>, ptr addrspace(2) @c2, align 16
+// CHECK-NEXT: store <4 x i32> [[L0]], ptr [[V0]], align 16
+// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[C]], i32 0, i32 1
+// CHECK-NEXT: [[L1:%.*]] = load <4 x i32>, ptr addrspace(2) getelementptr inbounds ([2 x <4 x i32>], ptr addrspace(2) @c2, i32 0, i32 1), align 16
+// CHECK-NEXT: store <4 x i32> [[L1]], ptr [[V1]], align 16
 // CHECK-NEXT: ret void
 void arr_assign9() {
-  int4 C[2] = {1,2,3,4,5,6,7,8};
+  int4 C[2];
   C = c2;
 }
 
 // CHECK-LABEL: define hidden void {{.*}}arr_assign10
 // CHECK: [[C:%.*]] = alloca [2 x [2 x i32]], align 4
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[C]], ptr align 4 {{.*}}, i32 16, i1 false)
-// CHECK-NEXT: call void @llvm.memcpy.p0.p2.i32(ptr align 4 [[C]], ptr addrspace(2) align 4 @c3, i32 16, i1 false)
+// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 0
+// CHECK-NEXT: [[L0:%.*]] = load i32, ptr addrspace(2) @c3, align 4
+// CHECK-NEXT: store i32 [[L0]], ptr [[V0]], align 4
+// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 0, i32 1
+// CHECK-NEXT: [[L1:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, ptr addrspace(2) @c3, i32 0, i32 0, i32 0, i32 1), align 4
+// CHECK-NEXT: store i32 [[L1]], ptr [[V1]], align 4
+// CHECK-NEXT: [[V2:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 1, i32 0
+// CHECK-NEXT: [[L2:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, ptr addrspace(2) @c3, i32 0, i32 1, i32 0, i32 0), align 4
+// CHECK-NEXT: store i32 [[L2]], ptr [[V2]], align 4
+// CHECK-NEXT: [[V3:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 1, i32 1
+// CHECK-NEXT: [[L3:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, ptr addrspace(2) @c3, i32 0, i32 1, i32 1), align 4
+// CHECK-NEXT: store i32 [[L3]], ptr [[V3]], align 4
 // CHECK-NEXT: ret void
 void arr_assign10() {
-  int C[2][2] = {1,2,3,4};
+  int C[2][2];
   C = c3;
 }
 
 // CHECK-LABEL: define hidden void {{.*}}arr_assign11
-// CHECK: [[C:%.*]] = alloca [1 x %struct.S], align 1
-// CHECK: call void @llvm.memcpy.p0.p2.i32(ptr align 1 [[C]], ptr addrspace(2) align 1 @c4, i32 8, i1 false)
+// CHECK: [[C:%.*]] = alloca [2 x %struct.S], align 1
+// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 0
+// CHECK-NEXT: [[L0:%.*]] = load i32, ptr addrspace(2) @c4, align 4
+// CHECK-NEXT: store i32 [[L0]], ptr [[V0]], align 4
+// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 0, i32 1
+// CHECK-NEXT: [[L1:%.*]] = load float, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }>, ptr addrspace(2) @c4, i32 0, i32 0, i32 0, i32 1), align 4
+// CHECK-NEXT: store float [[L1]], ptr [[V1]], align 4
+// CHECK-NEXT: [[V2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 1, i32 0
+// CHECK-NEXT: [[L2:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }>, ptr addrspace(2) @c4, i32 0, i32 1, i32 0), align 4
+// CHECK-NEXT: store i32 [[L2]], ptr [[V2]], align 4
+// CHECK-NEXT: [[V3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 1, i32 1
+// CHECK-NEXT: [[L3:%.*]] = load float, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }>, ptr addrspace(2) @c4, i32 0, i32 1, i32 1), align 4
+// CHECK-NEXT: store float [[L3]], ptr [[V3]], align 4
 // CHECK-NEXT: ret void
 void arr_assign11() {
-  S s = {1, 2.0};
-  S C[1] = {s};
+  S C[2];
   C = c4;
 }
diff --git a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
index b36682e065b3a..df82e8201ba55 100644
--- a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
@@ -37,9 +37,9 @@ void main(unsigned GI : SV_GroupIndex) {}
 // INLINE-NEXT:   alloca
 // INLINE-NEXT:   store i32 12
 // INLINE-NEXT:   store i32 13
-// INLINE-NEXT:   %[[HANDLE:.*]] = call target("dx.CBuffer", target("dx.Layout", %"__cblayout_$Globals", 4, 0))
-// INLINE-NEXT-SAME: @"llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_$Globalss_4_0tt"(i32 0, i32 0, i32 1, i32 0, i1 false)
-// INLINE-NEXT:   store target("dx.CBuffer", target("dx.Layout", %"__cblayout_$Globals", 4, 0)) %[[HANDLE]], ptr @"$Globals.cb", align 4
+// INLINE-NEXT:   %[[HANDLE:.*]] = call target("dx.CBuffer", %"__cblayout_$Globals")
+// INLINE-NEXT-SAME: @"llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_$Globalsst"(i32 0, i32 0, i32 1, i32 0, i1 false, ptr @"$Globals.str")
+// INLINE-NEXT:   store target("dx.CBuffer", %"__cblayout_$Globals") %[[HANDLE]], ptr @"$Globals.cb", align 4
 // INLINE-NEXT:   %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
 // INLINE-NEXT:   store i32 %
 // INLINE-NEXT:   store i32 0
diff --git a/clang/test/CodeGenHLSL/resources/cbuffer.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer.hlsl
index 8dcff5dad9d13..ab37ae0bc4e68 100644
--- a/clang/test/CodeGenHLSL/resources/cbuffer.hlsl
+++ b/clang/test/CodeGenHLSL/resources/cbuffer.hlsl
@@ -1,37 +1,121 @@
 // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-compute -fnative-half-type -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
 
-// CHECK: %__cblayout_CBScalars = type <{ float, double, half, i64, i32, i16, i32, i64 }>
-// CHECK: %__cblayout_CBVectors = type <{ <3 x float>, <3 x double>, <2 x half>, <3 x i64>, <4 x i32>, <3 x i16>, <3 x i64> }>
-// CHECK: %__cblayout_CBArrays = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [2 x [3 x [4 x <4 x i32>]]], [1 x i16], [2 x i64], [4 x i32] }>
-// CHECK: %__cblayout_CBStructs = type <{ target("dx.Layout", %A, 8, 0), target("dx.Layout", %B, 14, 0, 8),
-// CHECK-SAME: target("dx.Layout", %C, 24, 0, 16), [5 x target("dx.Layout", %A, 8, 0)],
-// CHECK-SAME: target("dx.Layout", %__cblayout_D, 94, 0), half, <3 x i16> }>
+// CHECK: %__cblayout_CBScalars = type <{
+// CHECK-SAME:   float, target("dx.Padding", 4), double,
+// CHECK-SAME:   half, target("dx.Padding", 6), i64,
+// CHECK-SAME:   i32, i16, target("dx.Padding", 2), i32, target("dx.Padding", 4),
+// CHECK-SAME:   i64
+// CHECK-SAME: }>
+
+// CHECK: %__cblayout_CBVectors = type <{
+// CHECK-SAME:   <3 x float>, target("dx.Padding", 4),
+// CHECK-SAME:   <3 x double>, <2 x half>, target("dx.Padding", 4),
+// CHECK-SAME:   <3 x i64>, target("dx.Padding", 8),
+// CHECK-SAME:   <4 x i32>,
+// CHECK-SAME:   <3 x i16>, target("dx.Padding", 10),
+// CHECK-SAME:   <3 x i64>
+// CHECK-SAME: }>
+
+// CHECK: %__cblayout_CBArrays = type <{
+// CHECK-SAME: <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, target("dx.Padding", 12),
+// CHECK-SAME: <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, target("dx.Padding", 8),
+// CHECK-SAME: <{ [1 x <{
+// CHECK-SAME:   <{ [1 x <{ half, target("dx.Padding", 14) }>], half }>, target("dx.Padding", 14) }>],
+// CHECK-SAME:   <{ [1 x <{ half, target("dx.Padding", 14) }>], half }>
+// CHECK-SAME: }>, target("dx.Padding", 14),
+// CHECK-SAME: <{ [2 x <{ i64, target("dx.Padding", 8) }>], i64 }>, target("dx.Padding", 8),
+// CHECK-SAME: [2 x [3 x [4 x <4 x i32>]]]
+// CHECK-SAME: [1 x i16], target("dx.Padding", 14),
+// CHECK-SAME: <{ [1 x <{ i64, target("dx.Padding", 8) }>], i64 }>, target("dx.Padding", 8),
+// CHECK-SAME: <{ [3 x <{ i32, target("dx.Padding", 12) }>], i32 }>
+// CHECK-SAME: }>
+
+// CHECK: %__cblayout_CBStructs = type <{
+// CHECK-SAME:   %A, target("dx.Padding", 8),
+
+// TODO: We should have target("dx.Padding", 2) padding after %B, but we don't correctly handle
+// 2- and 3-element vectors inside structs yet because of DataLayout rules.
+// CHECK-SAME: %B,
+
+// CHECK-SAME:   %C, target("dx.Padding", 8),
+// CHECK-SAME:   <{ [4 x <{ %A, target("dx.Padding", 8) }>], %A }>, target("dx.Padding", 8),
+// CHECK-SAME:   %__cblayout_D, half,
+// CHECK-SAME:   <3 x i16>
+// CHECK-SAME: }>
 
 // CHECK: %A = type <{ <2 x float> }>
 // CHECK: %B = type <{ <2 x float>, <3 x i16> }>
-// CHECK: %C = type <{ i32, target("dx.Layout", %A, 8, 0) }>
-// CHECK: %__cblayout_D = type <{ [2 x [3 x target("dx.Layout", %B, 14, 0, 8)]] }>
+// CHECK: %C = type <{ i32, target("dx.Padding", 12), %A }>
+
+// CHECK: %__cblayout_D = type <{
+// CHECK-SAME:   <{ [1 x <{
+// CHECK-SAME:     <{ [2 x <{ %B, target("dx.Padding", 2) }>], %B }>, target("dx.Padding", 2)
+// CHECK-SAME:   }>],
+// CHECK-SAME:   <{ [2 x <{ %B, target("dx.Padding", 2) }>], %B }> }>
+// CHECK-SAME: }>
+
+// CHECK: %__cblayout_CBClasses = type <{
+// CHECK-SAME:   %K, target("dx.Padding", 12),
+// CHECK-SAME:   %L, target("dx.Padding", 8),
+// CHECK-SAME:   %M, target("dx.Padding", 12),
+// CHECK-SAME:   <{ [9 x <{ %K, target("dx.Padding", 12) }>], %K }>
+// CHECK-SAME: }>
 
-// CHECK: %__cblayout_CBClasses = type <{ target("dx.Layout", %K, 4, 0), target("dx.Layout", %L, 8, 0, 4),
-// CHECK-SAME: target("dx.Layout", %M, 68, 0), [10 x target("dx.Layout", %K, 4, 0)] }>
 // CHECK: %K = type <{ float }>
 // CHECK: %L = type <{ float, float }>
-// CHECK: %M = type <{ [5 x target("dx.Layout", %K, 4, 0)] }>
-
-// CHECK: %__cblayout_CBMix = type <{ [2 x target("dx.Layout", %Test, 8, 0, 4)], float, [3 x [2 x <2 x float>]], float,
-// CHECK-SAME: target("dx.Layout", %anon, 4, 0), double, target("dx.Layout", %anon.0, 8, 0), float, <1 x double>, i16 }>
+// CHECK: %M = type <{ <{ [4 x <{ %K, target("dx.Padding", 12) }>], %K }> }>
+
+// CHECK: %__cblayout_CBMix = type <{
+// CHECK-SAME:   <{ [1 x <{ %Test, target("dx.Padding", 8) }>], %Test }>, float, target("dx.Padding", 4)
+// CHECK-SAME:   <{ [2 x <{
+// CHECK-SAME:     <{ [1 x <{ <2 x float>, target("dx.Padding", 8) }>], <2 x float> }>, target("dx.Padding", 8) }>],
+// CHECK-SAME:     <{ [1 x <{ <2 x float>, target("dx.Padding", 8) }>], <2 x float> }>
+// CHECK-SAME:   }>, float, target("dx.Padding", 4),
+// CHECK-SAME:   %anon, target("dx.Padding", 4), double,
+// CHECK-SAME:   %anon.0, float, target("dx.Padding", 4),
+// CHECK-SAME:   <1 x double>, i16
+// CHECK-SAME: }>
 
 // CHECK: %Test = type <{ float, float }>
 // CHECK: %anon = type <{ float }>
 // CHECK: %anon.0 = type <{ <2 x i32> }>
 
-// CHECK: %__cblayout_CB_A = type <{ [2 x double], [3 x <3 x float>], float, [3 x double], half, [1 x <2 x double>], float, [2 x <3 x half>], <3 x half> }>
-// CHECK: %__cblayout_CB_B = type <{ [3 x <3 x double>], <3 x half> }>
-// CHECK: %__cblayout_CB_C = type <{ i32, target("dx.Layout", %F, 96, 0, 16, 28, 32, 56, 64, 80, 84, 90), half, target("dx.Layout", %G, 258, 0, 48, 64, 256), double }>
-
-// CHECK: %F = type <{ double, <3 x float>, float, <3 x double>, half, <2 x double>, float, <3 x half>, <3 x half> }>
-// CHECK: %G = type <{ target("dx.Layout", %E, 36, 0, 8, 16, 20, 22, 24, 32), [1 x float], [2 x target("dx.Layout", %F, 96, 0, 16, 28, 32, 56, 64, 80, 84, 90)], half }>
-// CHECK: %E = type <{ float, double, float, half, i16, i64, i32 }>
+// CHECK: %__cblayout_CB_A = type <{
+// CHECK-SAME:   <{ [1 x <{ double, target("dx.Padding", 8) }>], double }>, target("dx.Padding", 8),
+// CHECK-SAME:   <{ [2 x <{ <3 x float>, target("dx.Padding", 4) }>], <3 x float> }>, float,
+// CHECK-SAME:   <{ [2 x <{ double, target("dx.Padding", 8) }>], double }>, half, target("dx.Padding", 6),
+// CHECK-SAME:   [1 x <2 x double>],
+// CHECK-SAME:   float, target("dx.Padding", 12),
+// CHECK-SAME:   <{ [1 x <{ <3 x half>, target("dx.Padding", 10) }>], <3 x half> }>, <3 x half>
+// CHECK-SAME: }>
+
+// CHECK: %__cblayout_CB_B = type <{
+// CHECK-SAME: <{ [2 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, <3 x half>
+// CHECK-SAME: }>
+
+// CHECK: %__cblayout_CB_C = type <{
+// CHECK-SAME:   i32, target("dx.Padding", 12),
+// CHECK-SAME:   %F,
+// CHECK-SAME:   half, target("dx.Padding", 14),
+// CHECK-SAME:   %G, target("dx.Padding", 6), double
+// CHECK-SAME: }>
+
+// CHECK: %F = type <{
+// CHECK-SAME:   double, target("dx.Padding", 8),
+// CHECK-SAME:   <3 x float>, float,
+// CHECK-SAME:   <3 x double>, half, target("dx.Padding", 6),
+// CHECK-SAME:   <2 x double>,
+// CHECK-SAME:   float, <3 x half>, <3 x half>
+// CHECK-SAME: }>
+
+// CHECK: %G = type <{
+// CHECK-SAME:   %E, target("dx.Padding", 12),
+// CHECK-SAME:   [1 x float], target("dx.Padding", 12),
+// CHECK-SAME:   [2 x %F],
+// CHECK-SAME:   half
+// CHECK-SAME: }>
+
+// CHECK: %E = type <{ float, target("dx.Padding", 4), double, float, half, i16, i64, i32 }>
 
 cbuffer CBScalars : register(b1, space5) {
   float a1;
@@ -44,8 +128,7 @@ cbuffer CBScalars : register(b1, space5) {
   int64_t a8;
 }
 
-// CHECK: @CBScalars.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBScalars,
-// CHECK-SAME: 56, 0, 8, 16, 24, 32, 36, 40, 48))
+// CHECK: @CBScalars.cb = global target("dx.CBuffer", %__cblayout_CBScalars)
 // CHECK: @a1 = external hidden addrspace(2) global float, align 4
 // CHECK: @a2 = external hidden addrspace(2) global double, align 8
 // CHECK: @a3 = external hidden addrspace(2) global half, align 2
@@ -67,8 +150,7 @@ cbuffer CBVectors {
   // FIXME: add a bool vectors after llvm-project/llvm#91639 is added
 }
 
-// CHECK: @CBVectors.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBVectors,
-// CHECK-SAME: 136, 0, 16, 40, 48, 80, 96, 112))
+// CHECK: @CBVectors.cb = global target("dx.CBuffer", %__cblayout_CBVectors)
 // CHECK: @b1 = external hidden addrspace(2) global <3 x float>, align 16
 // CHECK: @b2 = external hidden addrspace(2) global <3 x double>, align 32
 // CHECK: @b3 = external hidden addrspace(2) global <2 x half>, align 4
@@ -89,16 +171,15 @@ cbuffer CBArrays : register(b2) {
   bool c8[4];
 }
 
-// CHECK: @CBArrays.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBArrays,
-// CHECK-SAME: 708, 0, 48, 112, 176, 224, 608, 624, 656))
-// CHECK: @c1 = external hidden addrspace(2) global [3 x float], align 4
-// CHECK: @c2 = external hidden addrspace(2) global [2 x <3 x double>], align 32
-// CHECK: @c3 = external hidden addrspace(2) global [2 x [2 x half]], align 2
-// CHECK: @c4 = external hidden addrspace(2) global [3 x i64], align 8
+// CHECK: @CBArrays.cb = global target("dx.CBuffer", %__cblayout_CBArrays)
+// CHECK: @c1 = external hidden addrspace(2) global <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, align 4
+// CHECK: @c2 = external hidden addrspace(2) global <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, align 32
+// CHECK: @c3 = external hidden addrspace(2) global <{ [1 x <{ <{ [1 x <{ half, target("dx.Padding", 14) }>], half }>, target("dx.Padding", 14) }>], <{ [1 x <{ half, target("dx.Padding", 14) }>], half }> }>, align 2
+// CHECK: @c4 = external hidden addrspace(2) global <{ [2 x <{ i64, target("dx.Padding", 8) }>], i64 }>, align 8
 // CHECK: @c5 = external hidden addrspace(2) global [2 x [3 x [4 x <4 x i32>]]], align 16
 // CHECK: @c6 = external hidden addrspace(2) global [1 x i16], align 2
-// CHECK: @c7 = external hidden addrspace(2) global [2 x i64], align 8
-// CHECK: @c8 = external hidden addrspace(2) global [4 x i32], align 4
+// CHECK: @c7 = external hidden addrspace(2) global <{ [1 x <{ i64, target("dx.Padding", 8) }>], i64 }>, align 8
+// CHECK: @c8 = external hidden addrspace(2) global <{ [3 x <{ i32, target("dx.Padding", 12) }>], i32 }>, align 4
 // CHECK: @CBArrays.str = private unnamed_addr constant [9 x i8] c"CBArrays\00", align 1
 
 typedef uint32_t4 uint32_t8[2];
@@ -110,8 +191,7 @@ cbuffer CBTypedefArray : register(space2) {
   T2 t2[2];
 }
 
-// CHECK: @CBTypedefArray.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBTypedefArray,
-// CHECK-SAME: 128, 0, 64))
+// CHECK: @CBTypedefArray.cb = global target("dx.CBuffer", %__cblayout_CBTypedefArray)
 // CHECK: @t1 = external hidden addrspace(2) global [2 x [2 x <4 x i32>]], align 16
 // CHECK: @t2 = external hidden addrspace(2) global [2 x [2 x <4 x i32>]], align 16
 // CHECK: @CBTypedefArray.str = private unnamed_addr constant [15 x i8] c"CBTypedefArray\00", align 1
@@ -135,13 +215,12 @@ struct D {
   Empty es;
 };
 
-// CHECK: @CBStructs.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBStructs,
-// CHECK-SAME: 246, 0, 16, 32, 64, 144, 238, 240))
-// CHECK: @a = external hidden addrspace(2) global target("dx.Layout", %A, 8, 0), align 1
-// CHECK: @b = external hidden addrspace(2) global target("dx.Layout", %B, 14, 0, 8), align 1
-// CHECK: @c = external hidden addrspace(2) global target("dx.Layout", %C, 24, 0, 16), align 1
-// CHECK: @array_of_A = external hidden addrspace(2) global [5 x target("dx.Layout", %A, 8, 0)], align 1
-// CHECK: @d = external hidden addrspace(2) global target("dx.Layout", %__cblayout_D, 94, 0), align 1
+// CHECK: @CBStructs.cb = global target("dx.CBuffer", %__cblayout_CBStructs)
+// CHECK: @a = external hidden addrspace(2) global %A, align 1
+// CHECK: @b = external hidden addrspace(2) global %B, align 1
+// CHECK: @c = external hidden addrspace(2) global %C, align 1
+// CHECK: @array_of_A = external hidden addrspace(2) global <{ [4 x <{ %A, target("dx.Padding", 8) }>], %A }>, align 1
+// CHECK: @d = external hidden addrspace(2) global %__cblayout_D, align 1
 // CHECK: @e = external hidden addrspace(2) global half, align 2
 // CHECK: @f = external hidden addrspace(2) global <3 x i16>, align 8
 // CHECK: @CBStructs.str = private unnamed_addr constant [10 x i8] c"CBStructs\00", align 1
@@ -176,27 +255,25 @@ cbuffer CBClasses {
   K ka[10];
 };
 
-// CHECK: @CBClasses.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBClasses,
-// CHECK-SAME: 260, 0, 16, 32, 112))
-// CHECK: @k = external hidden addrspace(2) global target("dx.Layout", %K, 4, 0), align 1
-// CHECK: @l = external hidden addrspace(2) global target("dx.Layout", %L, 8, 0, 4), align 1
-// CHECK: @m = external hidden addrspace(2) global target("dx.Layout", %M, 68, 0), align 1
-// CHECK: @ka = external hidden addrspace(2) global [10 x target("dx.Layout", %K, 4, 0)], align 1
+// CHECK: @CBClasses.cb = global target("dx.CBuffer", %__cblayout_CBClasses)
+// CHECK: @k = external hidden addrspace(2) global %K, align 1
+// CHECK: @l = external hidden addrspace(2) global %L, align 1
+// CHECK: @m = external hidden addrspace(2) global %M, align 1
+// CHECK: @ka = external hidden addrspace(2) global <{ [9 x <{ %K, target("dx.Padding", 12) }>], %K }>, align 1
 // CHECK: @CBClasses.str = private unnamed_addr constant [10 x i8] c"CBClasses\00", align 1
 
 struct Test {
     float a, b;
 };
 
-// CHECK: @CBMix.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBMix,
-// CHECK-SAME: 170, 0, 24, 32, 120, 128, 136, 144, 152, 160, 168))
-// CHECK: @test = external hidden addrspace(2) global [2 x target("dx.Layout", %Test, 8, 0, 4)], align 1
+// CHECK: @CBMix.cb = global target("dx.CBuffer", %__cblayout_CBMix)
+// CHECK: @test = external hidden addrspace(2) global <{ [1 x <{ %Test, target("dx.Padding", 8) }>], %Test }>, align 1
 // CHECK: @f1 = external hidden addrspace(2) global float, align 4
-// CHECK: @f2 = external hidden addrspace(2) global [3 x [2 x <2 x float>]], align 8
+// CHECK: @f2 = external hidden addrspace(2) global <{ [2 x <{ <{ [1 x <{ <2 x float>, target("dx.Padding", 8) }>], <2 x float> }>, target("dx.Padding", 8) }>], <{ [1 x <{ <2 x float>, target("dx.Padding", 8) }>], <2 x float> }> }>, align 8
 // CHECK: @f3 = external hidden addrspace(2) global float, align 4
-// CHECK: @f4 = external hidden addrspace(2) global target("dx.Layout", %anon, 4, 0), align 1
+// CHECK: @f4 = external hidden addrspace(2) global %anon, align 1
 // CHECK: @f5 = external hidden addrspace(2) global double, align 8
-// CHECK: @f6 = external hidden addrspace(2) global target("dx.Layout", %anon.0, 8, 0), align 1
+// CHECK: @f6 = external hidden addrspace(2) global %anon.0, align 1
 // CHECK: @f7 = external hidden addrspace(2) global float, align 4
 // CHECK: @f8 = external hidden addrspace(2) global <1 x double>, align 8
 // CHECK: @f9 = external hidden addrspace(2) global i16, align 2
@@ -215,7 +292,7 @@ cbuffer CBMix {
     uint16_t f9;
 };
 
-// CHECK: @CB_A.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_A, 188, 0, 32, 76, 80, 120, 128, 144, 160, 182))
+// CHECK: @CB_A.cb = global target("dx.CBuffer", %__cblayout_CB_A)
 
 cbuffer CB_A {
   double B0[2];
@@ -229,7 +306,7 @@ cbuffer CB_A {
   half3 B8;
 }
 
-// CHECK: @CB_B.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_B, 94, 0, 88))
+// CHECK: @CB_B.cb = global target("dx.CBuffer", %__cblayout_CB_B)
 cbuffer CB_B {
   double3 B9[3];
   half3 B10;
@@ -264,7 +341,7 @@ struct G {
   half C3;
 };
 
-// CHECK: @CB_C.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_C, 400, 0, 16, 112, 128, 392))
+// CHECK: @CB_C.cb = global target("dx.CBuffer", %__cblayout_CB_C)
 cbuffer CB_C {
   int D0;
   F D1;
@@ -275,63 +352,63 @@ cbuffer CB_C {
 
 // CHECK: define internal void @_init_buffer_CBScalars.cb()
 // CHECK-NEXT: entry:
-// CHECK-NEXT: %CBScalars.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBScalars, 56, 0, 8, 16, 24, 32, 36, 40, 48))
-// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBScalarss_56_0_8_16_24_32_36_40_48tt(i32 5, i32 1, i32 1, i32 0, ptr @CBScalars.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBScalars, 56, 0, 8, 16, 24, 32, 36, 40, 48)) %CBScalars.cb_h, ptr @CBScalars.cb, align 4
+// CHECK-NEXT: %CBScalars.cb_h = call target("dx.CBuffer", %__cblayout_CBScalars)
+// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_s___cblayout_CBScalarsst(i32 5, i32 1, i32 1, i32 0, ptr @CBScalars.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBScalars) %CBScalars.cb_h, ptr @CBScalars.cb, align 4
 
 // CHECK: define internal void @_init_buffer_CBVectors.cb()
 // CHECK-NEXT: entry:
-// CHECK-NEXT: %CBVectors.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBVectors, 136, 0, 16, 40, 48, 80, 96, 112))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBVectorss_136_0_16_40_48_80_96_112tt(i32 0, i32 0, i32 1, i32 0, ptr @CBVectors.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBVectors, 136, 0, 16, 40, 48, 80, 96, 112)) %CBVectors.cb_h, ptr @CBVectors.cb, align 4
+// CHECK-NEXT: %CBVectors.cb_h = call target("dx.CBuffer", %__cblayout_CBVectors)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBVectorsst(i32 0, i32 0, i32 1, i32 0, ptr @CBVectors.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBVectors) %CBVectors.cb_h, ptr @CBVectors.cb, align 4
 
 // CHECK: define internal void @_init_buffer_CBArrays.cb()
 // CHECK-NEXT: entry:
-// CHECK-NEXT: %CBArrays.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBArrays, 708, 0, 48, 112, 176, 224, 608, 624, 656))
-// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBArrayss_708_0_48_112_176_224_608_624_656tt(i32 0, i32 2, i32 1, i32 0, ptr @CBArrays.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBArrays, 708, 0, 48, 112, 176, 224, 608, 624, 656)) %CBArrays.cb_h, ptr @CBArrays.cb, align 4
+// CHECK-NEXT: %CBArrays.cb_h = call target("dx.CBuffer", %__cblayout_CBArrays)
+// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_s___cblayout_CBArraysst(i32 0, i32 2, i32 1, i32 0, ptr @CBArrays.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBArrays) %CBArrays.cb_h, ptr @CBArrays.cb, align 4
 
 // CHECK: define internal void @_init_buffer_CBTypedefArray.cb()
 // CHECK-NEXT: entry:
-// CHECK-NEXT: %CBTypedefArray.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBTypedefArray, 128, 0, 64))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBTypedefArrays_128_0_64tt(i32 1, i32 2, i32 1, i32 0, ptr @CBTypedefArray.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBTypedefArray, 128, 0, 64)) %CBTypedefArray.cb_h, ptr @CBTypedefArray.cb, align 4
+// CHECK-NEXT: %CBTypedefArray.cb_h = call target("dx.CBuffer", %__cblayout_CBTypedefArray)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBTypedefArrayst(i32 1, i32 2, i32 1, i32 0, ptr @CBTypedefArray.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBTypedefArray) %CBTypedefArray.cb_h, ptr @CBTypedefArray.cb, align 4
 
 // CHECK: define internal void @_init_buffer_CBStructs.cb()
 // CHECK-NEXT: entry:
-// CHECK-NEXT:   %CBStructs.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBStructs, 246, 0, 16, 32, 64, 144, 238, 240))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBStructss_246_0_16_32_64_144_238_240tt(i32 2, i32 0, i32 1, i32 0, ptr @CBStructs.str)
-// CHECK-NEXT:   store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBStructs, 246, 0, 16, 32, 64, 144, 238, 240)) %CBStructs.cb_h, ptr @CBStructs.cb, align 4
+// CHECK-NEXT:   %CBStructs.cb_h = call target("dx.CBuffer", %__cblayout_CBStructs)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBStructsst(i32 2, i32 0, i32 1, i32 0, ptr @CBStructs.str)
+// CHECK-NEXT:   store target("dx.CBuffer", %__cblayout_CBStructs) %CBStructs.cb_h, ptr @CBStructs.cb, align 4
 
 // CHECK: define internal void @_init_buffer_CBClasses.cb()
 // CHECK-NEXT: entry:
-// CHECK-NEXT: %CBClasses.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBClasses, 260, 0, 16, 32, 112))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBClassess_260_0_16_32_112tt(i32 3, i32 0, i32 1, i32 0, ptr @CBClasses.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBClasses, 260, 0, 16, 32, 112)) %CBClasses.cb_h, ptr @CBClasses.cb, align 4
+// CHECK-NEXT: %CBClasses.cb_h = call target("dx.CBuffer", %__cblayout_CBClasses)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBClassesst(i32 3, i32 0, i32 1, i32 0, ptr @CBClasses.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBClasses) %CBClasses.cb_h, ptr @CBClasses.cb, align 4
 
 // CHECK: define internal void @_init_buffer_CBMix.cb()
 // CHECK-NEXT: entry:
-// CHECK-NEXT: %CBMix.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBMix, 170, 0, 24, 32, 120, 128, 136, 144, 152, 160, 168))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBMixs_170_0_24_32_120_128_136_144_152_160_168tt(i32 4, i32 0, i32 1, i32 0, ptr @CBMix.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBMix, 170, 0, 24, 32, 120, 128, 136, 144, 152, 160, 168)) %CBMix.cb_h, ptr @CBMix.cb, align 4
+// CHECK-NEXT: %CBMix.cb_h = call target("dx.CBuffer", %__cblayout_CBMix)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBMixst(i32 4, i32 0, i32 1, i32 0, ptr @CBMix.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBMix) %CBMix.cb_h, ptr @CBMix.cb, align 4
 
 // CHECK: define internal void @_init_buffer_CB_A.cb()
 // CHECK-NEXT: entry:
-// CHECK-NEXT: %CB_A.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_A, 188, 0, 32, 76, 80, 120, 128, 144, 160, 182))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CB_As_188_0_32_76_80_120_128_144_160_182tt(i32 5, i32 0, i32 1, i32 0, ptr @CB_A.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_A, 188, 0, 32, 76, 80, 120, 128, 144, 160, 182)) %CB_A.cb_h, ptr @CB_A.cb, align 4
+// CHECK-NEXT: %CB_A.cb_h = call target("dx.CBuffer", %__cblayout_CB_A)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CB_Ast(i32 5, i32 0, i32 1, i32 0, ptr @CB_A.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CB_A) %CB_A.cb_h, ptr @CB_A.cb, align 4
 
 // CHECK: define internal void @_init_buffer_CB_B.cb()
 // CHECK-NEXT: entry:
-// CHECK-NEXT: %CB_B.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_B, 94, 0, 88))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CB_Bs_94_0_88tt(i32 6, i32 0, i32 1, i32 0, ptr @CB_B.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_B, 94, 0, 88)) %CB_B.cb_h, ptr @CB_B.cb, align 4
+// CHECK-NEXT: %CB_B.cb_h = call target("dx.CBuffer", %__cblayout_CB_B)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CB_Bst(i32 6, i32 0, i32 1, i32 0, ptr @CB_B.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CB_B) %CB_B.cb_h, ptr @CB_B.cb, align 4
 
 // CHECK: define internal void @_init_buffer_CB_C.cb()
 // CHECK-NEXT: entry:
-// CHECK-NEXT: %CB_C.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_C, 400, 0, 16, 112, 128, 392))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CB_Cs_400_0_16_112_128_392tt(i32 7, i32 0, i32 1, i32 0, ptr @CB_C.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_C, 400, 0, 16, 112, 128, 392)) %CB_C.cb_h, ptr @CB_C.cb, align 4
+// CHECK-NEXT: %CB_C.cb_h = call target("dx.CBuffer", %__cblayout_CB_C)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CB_Cst(i32 7, i32 0, i32 1, i32 0, ptr @CB_C.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CB_C) %CB_C.cb_h, ptr @CB_C.cb, align 4
 
 RWBuffer<float> Buf;
 
diff --git a/clang/test/CodeGenHLSL/resources/cbuffer_and_namespaces.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer_and_namespaces.hlsl
index b7bdce32e6507..1fe0a68261c94 100644
--- a/clang/test/CodeGenHLSL/resources/cbuffer_and_namespaces.hlsl
+++ b/clang/test/CodeGenHLSL/resources/cbuffer_and_namespaces.hlsl
@@ -4,18 +4,18 @@
 
 // CHECK: %"n0::n1::__cblayout_A" = type <{ float }>
 // CHECK: %"n0::__cblayout_B" = type <{ float }>
-// CHECK: %"n0::n2::__cblayout_C" = type <{ float, target("dx.Layout", %"n0::Foo", 4, 0) }>
+// CHECK: %"n0::n2::__cblayout_C" = type <{ float, target("dx.Padding", 12), %"n0::Foo" }>
 // CHECK: %"n0::Foo" = type <{ float }>
 
-// CHECK: @A.cb = global target("dx.CBuffer", target("dx.Layout", %"n0::n1::__cblayout_A", 4, 0))
+// CHECK: @A.cb = global target("dx.CBuffer", %"n0::n1::__cblayout_A")
 // CHECK: @_ZN2n02n11aE = external hidden addrspace(2) global float, align 4
 
-// CHECK: @B.cb = global target("dx.CBuffer", target("dx.Layout", %"n0::__cblayout_B", 4, 0))
+// CHECK: @B.cb = global target("dx.CBuffer", %"n0::__cblayout_B")
 // CHECK: @_ZN2n01aE = external hidden addrspace(2) global float, align 4
 
-// CHECK: @C.cb = global target("dx.CBuffer", target("dx.Layout", %"n0::n2::__cblayout_C", 20, 0, 16))
+// CHECK: @C.cb = global target("dx.CBuffer", %"n0::n2::__cblayout_C")
 // CHECK: @_ZN2n02n21aE = external hidden addrspace(2) global float, align 4
-// CHECK: external hidden addrspace(2) global target("dx.Layout", %"n0::Foo", 4, 0), align 1
+// CHECK: external hidden addrspace(2) global %"n0::Foo", align 1
 
 namespace n0 {
   struct Foo {
diff --git a/clang/test/CodeGenHLSL/resources/cbuffer_with_packoffset.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer_with_packoffset.hlsl
index 7bedd63c9f65d..c39fd75ec6ee4 100644
--- a/clang/test/CodeGenHLSL/resources/cbuffer_with_packoffset.hlsl
+++ b/clang/test/CodeGenHLSL/resources/cbuffer_with_packoffset.hlsl
@@ -2,10 +2,13 @@
 // RUN:   dxil-pc-shadermodel6.3-compute %s \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
 
-// CHECK: %__cblayout_CB = type <{ float, double, <2 x i32> }>
-// CHECK: %__cblayout_CB_1 = type <{ float, <2 x float> }>
+// TODO: Reordering fields doesn't work...
+// XFAIL: *
 
-// CHECK: @CB.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 176, 16, 168, 88))
+// CHECK: %__cblayout_CB = type <{ [16 x i8], float, [68 x i8], <2 x i32>, [72 x i8], double }>
+// CHECK: %__cblayout_CB_1 = type <{ [80 x i8], <2 x float>, float }>
+
+// CHECK: @CB.cb = global target("dx.CBuffer", %__cblayout_CB)
 // CHECK: @a = external hidden addrspace(2) global float, align 4
 // CHECK: @b = external hidden addrspace(2) global double, align 8
 // CHECK: @c = external hidden addrspace(2) global <2 x i32>, align 8
@@ -17,7 +20,7 @@ cbuffer CB : register(b1, space3) {
   int2 c : packoffset(c5.z);
 }
 
-// CHECK: @CB.cb.1 = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_1, 92, 88, 80))
+// CHECK: @CB.cb.1 = global target("dx.CBuffer", %__cblayout_CB_1)
 // CHECK: @x = external hidden addrspace(2) global float, align 4
 // CHECK: @y = external hidden addrspace(2) global <2 x float>, align 8
 
@@ -30,7 +33,7 @@ cbuffer CB : register(b0) {
 
 // CHECK: define internal void @_init_buffer_CB.cb()
 // CHECK-NEXT: entry:
-// CHECK-NEXT: %CB.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 176, 16, 168, 88))
+// CHECK-NEXT: %CB.cb_h = call target("dx.CBuffer", %__cblayout_CB)
 // CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBs_176_16_168_88tt(i32 3, i32 1, i32 1, i32 0, ptr @CB.str)
 
 float foo() {
diff --git a/clang/test/CodeGenHLSL/resources/cbuffer_with_static_global_and_function.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer_with_static_global_and_function.hlsl
index fa3405df9e3d3..b8c7babb8d634 100644
--- a/clang/test/CodeGenHLSL/resources/cbuffer_with_static_global_and_function.hlsl
+++ b/clang/test/CodeGenHLSL/resources/cbuffer_with_static_global_and_function.hlsl
@@ -2,7 +2,7 @@
 
 // CHECK: %__cblayout_A = type <{ float }>
 
-// CHECK: @A.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_A, 4, 0))
+// CHECK: @A.cb = global target("dx.CBuffer", %__cblayout_A)
 // CHECK: @a = external hidden addrspace(2) global float, align 4
 // CHECK-DAG: @_ZL1b = internal global float 3.000000e+00, align 4
 // CHECK-NOT: @B.cb
diff --git a/clang/test/CodeGenHLSL/resources/default_cbuffer.hlsl b/clang/test/CodeGenHLSL/resources/default_cbuffer.hlsl
index ad4d92f8afc02..5333dad962195 100644
--- a/clang/test/CodeGenHLSL/resources/default_cbuffer.hlsl
+++ b/clang/test/CodeGenHLSL/resources/default_cbuffer.hlsl
@@ -1,19 +1,18 @@
 // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-compute -fnative-half-type -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,DXIL
 // RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan1.3-compute -fnative-half-type -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,SPIRV
 
-// DXIL: %"__cblayout_$Globals" = type <{ float, float, target("dx.Layout", %__cblayout_S, 4, 0) }>
-// SPIRV: %"__cblayout_$Globals" = type <{ float, float, target("spirv.Layout", %__cblayout_S, 4, 0) }>
+// CHECK: %"__cblayout_$Globals" = type <{ float, float, target("{{.*}}.Padding", 8), %__cblayout_S }>
 // CHECK: %__cblayout_S = type <{ float }>
 
-// DXIL-DAG: @"$Globals.cb" = global target("dx.CBuffer", target("dx.Layout", %"__cblayout_$Globals", 20, 0, 4, 16))
+// DXIL-DAG: @"$Globals.cb" = global target("dx.CBuffer", %"__cblayout_$Globals")
 // DXIL-DAG: @a = external hidden addrspace(2) global float
 // DXIL-DAG: @g = external hidden addrspace(2) global float
-// DXIL-DAG: @h = external hidden addrspace(2) global target("dx.Layout", %__cblayout_S, 4, 0), align 4
+// DXIL-DAG: @h = external hidden addrspace(2) global %__cblayout_S, align 4
 
-// SPIRV-DAG: @"$Globals.cb" = global target("spirv.VulkanBuffer", target("spirv.Layout", %"__cblayout_$Globals", 20, 0, 4, 16), 2, 0)
+// SPIRV-DAG: @"$Globals.cb" = global target("spirv.VulkanBuffer", %"__cblayout_$Globals", 2, 0)
 // SPIRV-DAG: @a = external hidden addrspace(12) global float
 // SPIRV-DAG: @g = external hidden addrspace(12) global float
-// SPIRV-DAG: @h = external hidden addrspace(12) global target("spirv.Layout", %__cblayout_S, 4, 0), align 8
+// SPIRV-DAG: @h = external hidden addrspace(12) global %__cblayout_S, align 8
 
 struct EmptyStruct {
 };
diff --git a/clang/test/CodeGenHLSL/resources/default_cbuffer_with_layout.hlsl b/clang/test/CodeGenHLSL/resources/default_cbuffer_with_layout.hlsl
index 1b2cb0e99aa83..fee3b77c32dd2 100644
--- a/clang/test/CodeGenHLSL/resources/default_cbuffer_with_layout.hlsl
+++ b/clang/test/CodeGenHLSL/resources/default_cbuffer_with_layout.hlsl
@@ -13,6 +13,9 @@
 // CHECK-DAG: @e = external hidden addrspace(2) global <4 x float>, align 16
 // CHECK-DAG: @s = external hidden addrspace(2) global target("dx.Layout", %S, 8, 0), align 1
 
+// TODO: Reordering fields doesn't work...
+// XFAIL: *
+
 struct S {
   float2 v;
 };
diff --git a/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp b/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp
index f7fb886e7391d..2094ec61fcdb8 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp
@@ -53,7 +53,12 @@ static Instruction *findHandleDef(GlobalVariable *HandleVar) {
 }
 
 static bool replaceCBufferAccesses(Module &M) {
-  std::optional<hlsl::CBufferMetadata> CBufMD = hlsl::CBufferMetadata::get(M);
+  std::optional<hlsl::CBufferMetadata> CBufMD =
+      hlsl::CBufferMetadata::get(M, [](Type *Ty) {
+        if (auto *TET = dyn_cast<TargetExtType>(Ty))
+          return TET->getName() == "spirv.Padding";
+        return false;
+      });
   if (!CBufMD)
     return false;
 
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 704edd3139260..896b5232beb3a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -832,9 +832,6 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
         if (Ty->isArrayTy())
           Ty = Ty->getArrayElementType();
         else {
-          TargetExtType *BufferTy = cast<TargetExtType>(Ty);
-          assert(BufferTy->getTargetExtName() == "spirv.Layout");
-          Ty = BufferTy->getTypeParameter(0);
           assert(Ty && Ty->isStructTy());
           uint32_t Index = cast<ConstantInt>(II->getOperand(1))->getZExtValue();
           Ty = cast<StructType>(Ty)->getElementType(Index);
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll
index 4d32e66d017c9..cbc038329618c 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll
@@ -21,7 +21,7 @@
 %MyStruct = type { <4 x float> }
 %__cblayout_MyCBuffer = type <{ %MyStruct, <4 x float> }>
 
- at MyCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_MyCBuffer, 32, 0, 16), 2, 0) poison
+ at MyCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) poison
 @s = external hidden local_unnamed_addr addrspace(12) global %MyStruct, align 16
 @v = external hidden local_unnamed_addr addrspace(12) global <4 x float>, align 16
 @MyCBuffer.str = private unnamed_addr constant [10 x i8] c"MyCBuffer\00", align 1
@@ -30,10 +30,10 @@
 define void @main() {
 entry:
 ; CHECK: %[[tmp:[0-9]+]] = OpCopyObject %[[wrapper_ptr_t]] %[[MyCBuffer]]
-  %MyCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_MyCBuffer, 32, 0, 16), 2, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_MyCBuffers_32_0_16t_2_0t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @MyCBuffer.str)
-  store target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_MyCBuffer, 32, 0, 16), 2, 0) %MyCBuffer.cb_h.i.i, ptr @MyCBuffer.cb, align 8
+  %MyCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) @llvm.spv.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr nonnull @MyCBuffer.str)
+  store target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) %MyCBuffer.cb_h.i.i, ptr @MyCBuffer.cb, align 8
   %0 = tail call target("spirv.Image", float, 5, 2, 0, 0, 2, 3) @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_5_2_0_0_2_3t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str)
-  
+
 ; CHECK: %[[tmp_ptr:[0-9]+]] = OpAccessChain {{%[0-9]+}} %[[tmp]] %[[uint_0]] %[[uint_0]]
 ; CHECK: %[[v_ptr:.+]] = OpAccessChain %[[_ptr_Uniform_v4float]] %[[tmp]] %[[uint_0]] %[[uint_1]]
 ; CHECK: %[[s_ptr_gep:[0-9]+]] = OpInBoundsAccessChain %[[_ptr_Uniform_float]] %[[tmp_ptr]] %[[uint_0]] %[[uint_1]]
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer_unused.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer_unused.ll
index c365452a9b404..670548d3d3e27 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer_unused.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer_unused.ll
@@ -14,12 +14,12 @@
 %__cblayout_PartiallyUsedCBuffer = type <{ float, i32 }>
 %__cblayout_AnotherCBuffer = type <{ <4 x float>, <4 x float> }>
 
- at UnusedCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_UnusedCBuffer, 4, 0), 2, 0) poison
+ at UnusedCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_UnusedCBuffer, 2, 0) poison
 @UnusedCBuffer.str = private unnamed_addr constant [14 x i8] c"UnusedCBuffer\00", align 1
- at PartiallyUsedCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_PartiallyUsedCBuffer, 8, 0, 4), 2, 0) poison
+ at PartiallyUsedCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_PartiallyUsedCBuffer, 2, 0) poison
 @used_member = external hidden local_unnamed_addr addrspace(12) global float, align 4
 @PartiallyUsedCBuffer.str = private unnamed_addr constant [21 x i8] c"PartiallyUsedCBuffer\00", align 1
- at AnotherCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_AnotherCBuffer, 32, 0, 16), 2, 0) poison
+ at AnotherCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_AnotherCBuffer, 2, 0) poison
 @a = external hidden local_unnamed_addr addrspace(12) global <4 x float>, align 16
 @AnotherCBuffer.str = private unnamed_addr constant [15 x i8] c"AnotherCBuffer\00", align 1
 @.str = private unnamed_addr constant [7 x i8] c"output\00", align 1
@@ -28,18 +28,18 @@
 ; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(readwrite, argmem: write, inaccessiblemem: none)
 define void @main() local_unnamed_addr #1 {
 entry:
-  %UnusedCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_UnusedCBuffer, 4, 0), 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_UnusedCBuffers_4_0t_2_0t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @UnusedCBuffer.str)
-  store target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_UnusedCBuffer, 4, 0), 2, 0) %UnusedCBuffer.cb_h.i.i, ptr @UnusedCBuffer.cb, align 8
+  %UnusedCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_UnusedCBuffer, 2, 0) @llvm.spv.resource.handlefromimplicitbinding(i32 0, i32 0, i32 1, i32 0, ptr nonnull @UnusedCBuffer.str)
+  store target("spirv.VulkanBuffer", %__cblayout_UnusedCBuffer, 2, 0) %UnusedCBuffer.cb_h.i.i, ptr @UnusedCBuffer.cb, align 8
 
 ; CHECK: %[[tmp:[0-9]+]] = OpCopyObject {{%[0-9]+}} %[[PartiallyUsedCBuffer]]
 ; CHECK: %[[used_member_ptr:.+]] = OpAccessChain %{{.+}} %[[tmp]] %{{.+}} %[[uint_0:[0-9]+]]
-  %PartiallyUsedCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_PartiallyUsedCBuffer, 8, 0, 4), 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_PartiallyUsedCBuffers_8_0_4t_2_0t(i32 1, i32 0, i32 1, i32 0, ptr nonnull @PartiallyUsedCBuffer.str)
-  store target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_PartiallyUsedCBuffer, 8, 0, 4), 2, 0) %PartiallyUsedCBuffer.cb_h.i.i, ptr @PartiallyUsedCBuffer.cb, align 8
+  %PartiallyUsedCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_PartiallyUsedCBuffer, 2, 0) @llvm.spv.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr nonnull @PartiallyUsedCBuffer.str)
+  store target("spirv.VulkanBuffer", %__cblayout_PartiallyUsedCBuffer, 2, 0) %PartiallyUsedCBuffer.cb_h.i.i, ptr @PartiallyUsedCBuffer.cb, align 8
 
 ; CHECK: %[[tmp:[0-9]+]] = OpCopyObject {{%[0-9]+}} %[[AnotherCBuffer]]
 ; CHECK: %[[a_ptr:.+]] = OpAccessChain %{{.+}} %[[tmp]] %{{.+}} %[[uint_0]]
-  %AnotherCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_AnotherCBuffer, 32, 0, 16), 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_AnotherCBuffers_32_0_16t_2_0t(i32 2, i32 0, i32 1, i32 0, ptr nonnull @AnotherCBuffer.str)
-  store target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_AnotherCBuffer, 32, 0, 16), 2, 0) %AnotherCBuffer.cb_h.i.i, ptr @AnotherCBuffer.cb, align 8
+  %AnotherCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_AnotherCBuffer, 2, 0) @llvm.spv.resource.handlefromimplicitbinding(i32 2, i32 0, i32 1, i32 0, ptr nonnull @AnotherCBuffer.str)
+  store target("spirv.VulkanBuffer", %__cblayout_AnotherCBuffer, 2, 0) %AnotherCBuffer.cb_h.i.i, ptr @AnotherCBuffer.cb, align 8
   %0 = tail call target("spirv.Image", float, 5, 2, 0, 0, 2, 1) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_f32_5_2_0_0_2_1t(i32 3, i32 0, i32 1, i32 0, ptr nonnull @.str)
 
   %2 = load float, ptr addrspace(12) @used_member, align 4

>From c73d2a3d439776686788d7c84a5b04bf09f9d34f Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Fri, 26 Sep 2025 15:00:52 -0700
Subject: [PATCH 8/8] [DirectX] Drop dx.Layout extension type

TODO: This must go in after the frontend changes
---
 llvm/include/llvm/Analysis/DXILResource.h     | 21 -----
 llvm/lib/Analysis/DXILResource.cpp            | 13 ---
 .../DirectX/Metadata/cbuffer-layouttype.ll    | 82 -------------------
 3 files changed, 116 deletions(-)
 delete mode 100644 llvm/test/CodeGen/DirectX/Metadata/cbuffer-layouttype.ll

diff --git a/llvm/include/llvm/Analysis/DXILResource.h b/llvm/include/llvm/Analysis/DXILResource.h
index c7aff167324e6..bcbca78504041 100644
--- a/llvm/include/llvm/Analysis/DXILResource.h
+++ b/llvm/include/llvm/Analysis/DXILResource.h
@@ -222,27 +222,6 @@ class AnyResourceExtType : public TargetExtType {
   }
 };
 
-/// The dx.Layout target extension type
-///
-/// `target("dx.Layout", <Type>, <size>, [offsets...])`
-class LayoutExtType : public TargetExtType {
-public:
-  LayoutExtType() = delete;
-  LayoutExtType(const LayoutExtType &) = delete;
-  LayoutExtType &operator=(const LayoutExtType &) = delete;
-
-  Type *getWrappedType() const { return getTypeParameter(0); }
-  uint32_t getSize() const { return getIntParameter(0); }
-  uint32_t getOffsetOfElement(int I) const { return getIntParameter(I + 1); }
-
-  static bool classof(const TargetExtType *T) {
-    return T->getName() == "dx.Layout";
-  }
-  static bool classof(const Type *T) {
-    return isa<TargetExtType>(T) && classof(cast<TargetExtType>(T));
-  }
-};
-
 /// The dx.Padding target extension type
 ///
 /// `target("dx.Padding", NumBytes)`
diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp
index 6f19a68dcd194..14d9925b4297a 100644
--- a/llvm/lib/Analysis/DXILResource.cpp
+++ b/llvm/lib/Analysis/DXILResource.cpp
@@ -380,13 +380,6 @@ StructType *ResourceTypeInfo::createElementStruct(StringRef CBufferName) {
       Name.append(CBufferName);
     }
 
-    // TODO: Remove this when we update the frontend to use explicit padding.
-    if (LayoutExtType *LayoutType =
-            dyn_cast<LayoutExtType>(RTy->getResourceType())) {
-      StructType *Ty = cast<StructType>(LayoutType->getWrappedType());
-      return StructType::create(Ty->elements(), Name);
-    }
-
     return getOrCreateElementStruct(
         getTypeWithoutPadding(RTy->getResourceType()), Name);
   }
@@ -498,13 +491,7 @@ ResourceTypeInfo::UAVInfo ResourceTypeInfo::getUAV() const {
 
 uint32_t ResourceTypeInfo::getCBufferSize(const DataLayout &DL) const {
   assert(isCBuffer() && "Not a CBuffer");
-
   Type *ElTy = cast<CBufferExtType>(HandleTy)->getResourceType();
-
-  // TODO: Remove this when we update the frontend to use explicit padding.
-  if (auto *LayoutTy = dyn_cast<LayoutExtType>(ElTy))
-    return LayoutTy->getSize();
-
   return DL.getTypeAllocSize(ElTy);
 }
 
diff --git a/llvm/test/CodeGen/DirectX/Metadata/cbuffer-layouttype.ll b/llvm/test/CodeGen/DirectX/Metadata/cbuffer-layouttype.ll
deleted file mode 100644
index 85952c9ae4e83..0000000000000
--- a/llvm/test/CodeGen/DirectX/Metadata/cbuffer-layouttype.ll
+++ /dev/null
@@ -1,82 +0,0 @@
-; TODO: Remove this test once we've updated the frontend to use explicit
-; padding. The cbuffer-metadata.ll test covers the newer logic.
-
-; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s
-; RUN: opt -S --passes="dxil-pretty-printer" < %s 2>&1 | FileCheck %s --check-prefix=PRINT
-; RUN: llc %s --filetype=asm -o - < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,PRINT
-
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-pc-shadermodel6.6-compute"
-
-%__cblayout_CB1 = type <{ float, i32, double, <2 x i32> }>
- at CB1.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB1, 24, 0, 4, 8, 16)) poison
- at CB1.str = private unnamed_addr constant [4 x i8] c"CB1\00", align 1
-
-%__cblayout_CB2 = type <{ float, double, float, half, i16, i64, i32 }>
- at CB2.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB2, 36, 0, 8, 16, 20, 22, 24, 32)) poison
- at CB2.str = private unnamed_addr constant [4 x i8] c"CB2\00", align 1
-
-%__cblayout_MyConstants = type <{ double, <3 x float>, float, <3 x double>, half, <2 x double>, float, <3 x half>, <3 x half> }>
- at MyConstants.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_MyConstants, 96, 0, 16, 28, 32, 56, 64, 80, 84, 90)) poison
- at MyConstants.str = private unnamed_addr constant [12 x i8] c"MyConstants\00", align 1
-
-; PRINT:; Resource Bindings:
-; PRINT-NEXT:;
-; PRINT-NEXT:; Name            Type  Format  Dim   ID    HLSL Bind  Count
-; PRINT-NEXT:; ----
-; PRINT-NEXT:; CB1          cbuffer      NA   NA  CB0          cb0     1
-; PRINT-NEXT:; CB2          cbuffer      NA   NA  CB1          cb1     1
-; PRINT-NEXT:; MyConstants  cbuffer      NA   NA  CB2  cb5,space15     1
-
-define void @test() #0 {
-
-  ; cbuffer CB1 : register(b0) {
-  ;   float a;
-  ;   int b;
-  ;   double c;
-  ;   int2 d;
-  ; }
-  %CB1.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB1, 24, 0, 4, 8, 16))
-            @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr @CB1.str)
-  ; cbuffer CB2 : register(b0) {
-  ;   float a;
-  ;   double b;
-  ;   float c;
-  ;   half d;
-  ;   uint16_t e;
-  ;   int64_t f;
-  ;   int g;
-  ;}
-
-  %CB2.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB2, 36, 0, 8, 16, 20, 22, 24, 32))
-            @llvm.dx.resource.handlefrombinding(i32 0, i32 1, i32 1, i32 0, ptr @CB2.str)
-  ; cbuffer CB3 : register(b5) {
-  ;   double B0;
-  ;   float3 B1;
-  ;   float B2;
-  ;   double3 B3;
-  ;   half B4;
-  ;   double2 B5;
-  ;   float B6;
-  ;   half3 B7;
-  ;   half3 B8;
-  ; }
-  %CB3.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_MyConstants, 96, 0, 16, 28, 32, 56, 64, 80, 84, 90))
-            @llvm.dx.resource.handlefrombinding(i32 15, i32 5, i32 1, i32 0, ptr @MyConstants.str)
-
-  ret void
-}
-
-attributes #0 = { noinline nounwind "hlsl.shader"="compute" }
-
-; CHECK: @CB1 = external constant %CBuffer.CB1
-; CHECK: @CB2 = external constant %CBuffer.CB2
-; CHECK: @MyConstants = external constant %CBuffer.MyConstants
-
-; CHECK: !dx.resources = !{[[ResList:[!][0-9]+]]}
-
-; CHECK: [[ResList]] = !{null, null, [[CBList:[!][0-9]+]], null}
-; CHECK: [[CBList]] = !{![[CB1:[0-9]+]], ![[CB2:[0-9]+]], ![[MYCONSTANTS:[0-9]+]]}
-; CHECK: ![[CB1]] = !{i32 0, ptr @CB1, !"CB1", i32 0, i32 0, i32 1, i32 24, null}
-; CHECK: ![[CB2]] = !{i32 1, ptr @CB2, !"CB2", i32 0, i32 1, i32 1, i32 36, null}
-; CHECK: ![[MYCONSTANTS]] = !{i32 2, ptr @MyConstants, !"MyConstants", i32 15, i32 5, i32 1, i32 96, null}



More information about the llvm-commits mailing list