[clang] [llvm] Explicit types in cbuffer layouts (PR #156919)
Justin Bogner via cfe-commits
cfe-commits at lists.llvm.org
Wed Oct 22 09:46:39 PDT 2025
https://github.com/bogner updated https://github.com/llvm/llvm-project/pull/156919
>From 9a77087836ec47abed259c2bd1027041cdcb3754 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Tue, 21 Oct 2025 10:23:44 -0700
Subject: [PATCH 1/6] [HLSL] Allow completely unused cbuffers
We were checking for cbuffers where the global was removed, but if the
buffer is completely unused the whole thing can be null.
---
llvm/lib/Frontend/HLSL/CBuffer.cpp | 9 +++++++--
llvm/test/CodeGen/DirectX/CBufferAccess/unused.ll | 13 +++++++++++++
2 files changed, 20 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/DirectX/CBufferAccess/unused.ll
diff --git a/llvm/lib/Frontend/HLSL/CBuffer.cpp b/llvm/lib/Frontend/HLSL/CBuffer.cpp
index 407b6ad6d5a7e..1e4a1a5020b0b 100644
--- a/llvm/lib/Frontend/HLSL/CBuffer.cpp
+++ b/llvm/lib/Frontend/HLSL/CBuffer.cpp
@@ -43,8 +43,13 @@ std::optional<CBufferMetadata> CBufferMetadata::get(Module &M) {
for (const MDNode *MD : CBufMD->operands()) {
assert(MD->getNumOperands() && "Invalid cbuffer metadata");
- auto *Handle = cast<GlobalVariable>(
- cast<ValueAsMetadata>(MD->getOperand(0))->getValue());
+ // For an unused cbuffer, the handle may have been optimizzd out
+ Metadata *OpMD = MD->getOperand(0);
+ if (!OpMD)
+ continue;
+
+ auto *Handle =
+ cast<GlobalVariable>(cast<ValueAsMetadata>(OpMD)->getValue());
CBufferMapping &Mapping = Result->Mappings.emplace_back(Handle);
for (int I = 1, E = MD->getNumOperands(); I < E; ++I) {
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/unused.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/unused.ll
new file mode 100644
index 0000000000000..8c0d82e43b4b1
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/unused.ll
@@ -0,0 +1,13 @@
+; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s
+; Check that we correctly ignore cbuffers that were nulled out by optimizations.
+
+%__cblayout_CB = type <{ float }>
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
+ at x = external local_unnamed_addr addrspace(2) global float, align 4
+
+; CHECK-NOT: !hlsl.cbs =
+!hlsl.cbs = !{!0, !1, !2}
+
+!0 = !{ptr @CB.cb, ptr addrspace(2) @x}
+!1 = !{ptr @CB.cb, null}
+!2 = !{null, null}
>From 8f23fff7a06c0cb702b2b537b6a8d9787dbaa3e6 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Mon, 20 Oct 2025 16:38:04 -0700
Subject: [PATCH 2/6] [SPIRV][HLSL] Fix assert with cbuffers through constexpr
The comment here pointed out that RAUW would fall over given a
constantexpr, but then proceeded to just do what RAUW does by hand,
which falls over in the same way. Instead, convert constantexprs
involving cbuffer globals to instructions before processing them.
The test update just modifies the existing cbuffer test, since it
implied it was trying to test this exact case anyways.
---
llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp | 14 ++++++++------
llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll | 6 ++----
2 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp b/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp
index f7fb886e7391d..3ca0b40cac93e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsSPIRV.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ReplaceConstant.h"
#define DEBUG_TYPE "spirv-cbuffer-access"
using namespace llvm;
@@ -57,6 +58,12 @@ static bool replaceCBufferAccesses(Module &M) {
if (!CBufMD)
return false;
+ SmallVector<Constant *> CBufferGlobals;
+ for (const hlsl::CBufferMapping &Mapping : *CBufMD)
+ for (const hlsl::CBufferMember &Member : Mapping.Members)
+ CBufferGlobals.push_back(Member.GV);
+ convertUsersOfConstantsToInstructions(CBufferGlobals);
+
for (const hlsl::CBufferMapping &Mapping : *CBufMD) {
Instruction *HandleDef = findHandleDef(Mapping.Handle);
if (!HandleDef) {
@@ -80,12 +87,7 @@ static bool replaceCBufferAccesses(Module &M) {
Value *GetPointerCall = Builder.CreateIntrinsic(
PtrType, Intrinsic::spv_resource_getpointer, {HandleDef, IndexVal});
- // We cannot use replaceAllUsesWith here because some uses may be
- // ConstantExprs, which cannot be replaced with non-constants.
- SmallVector<User *, 4> Users(MemberGV->users());
- for (User *U : Users) {
- U->replaceUsesOfWith(MemberGV, GetPointerCall);
- }
+ MemberGV->replaceAllUsesWith(GetPointerCall);
}
}
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll
index 4d32e66d017c9..6d41875798ebc 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll
@@ -1,5 +1,5 @@
; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv1.6-vulkan1.3-library %s -o - | FileCheck %s
-; Test that uses of cbuffer members inside ConstantExprs are handled correctly.
+; Test that uses of cbuffer members are handled correctly.
; CHECK-DAG: OpDecorate %[[MyCBuffer:[0-9]+]] DescriptorSet 0
; CHECK-DAG: OpDecorate %[[MyCBuffer]] Binding 0
@@ -37,10 +37,8 @@ entry:
; CHECK: %[[tmp_ptr:[0-9]+]] = OpAccessChain {{%[0-9]+}} %[[tmp]] %[[uint_0]] %[[uint_0]]
; CHECK: %[[v_ptr:.+]] = OpAccessChain %[[_ptr_Uniform_v4float]] %[[tmp]] %[[uint_0]] %[[uint_1]]
; CHECK: %[[s_ptr_gep:[0-9]+]] = OpInBoundsAccessChain %[[_ptr_Uniform_float]] %[[tmp_ptr]] %[[uint_0]] %[[uint_1]]
- %gep = getelementptr inbounds %MyStruct, ptr addrspace(12) @s, i32 0, i32 0, i32 1
-
; CHECK: %[[s_val:.+]] = OpLoad %[[float]] %[[s_ptr_gep]]
- %load_from_gep = load float, ptr addrspace(12) %gep, align 4
+ %load_from_gep = load float, ptr addrspace(12) getelementptr inbounds (%MyStruct, ptr addrspace(12) @s, i32 0, i32 0, i32 1), align 4
; CHECK: %[[v_val:.+]] = OpLoad %[[v4float]] %[[v_ptr]]
%load_v = load <4 x float>, ptr addrspace(12) @v, align 16
>From df309582f530160b343e4278cbf9c31accb18267 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Mon, 20 Oct 2025 21:21:20 -0700
Subject: [PATCH 3/6] [DirectX] Teach DXILResourceAccess about cbuffers
This isn't reachable today but will come into play once we reorder
passes for #147352 and #147351.
---
.../lib/Target/DirectX/DXILResourceAccess.cpp | 152 ++++++++++++++++--
.../load-cbuffer-array-of-struct.ll | 59 +++++++
.../load-cbuffer-array-of-vector.ll | 45 ++++++
.../load-cbuffer-array-typedgep.ll | 27 ++++
.../ResourceAccess/load-cbuffer-arrays.ll | 129 +++++++++++++++
.../load-cbuffer-dynamic-struct.ll | 64 ++++++++
.../ResourceAccess/load-cbuffer-dynamic.ll | 42 +++++
.../ResourceAccess/load-cbuffer-scalars.ll | 87 ++++++++++
.../ResourceAccess/load-cbuffer-vectors.ll | 109 +++++++++++++
9 files changed, 704 insertions(+), 10 deletions(-)
create mode 100644 llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll
create mode 100644 llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll
create mode 100644 llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-typedgep.ll
create mode 100644 llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-arrays.ll
create mode 100644 llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic-struct.ll
create mode 100644 llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll
create mode 100644 llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-scalars.ll
create mode 100644 llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-vectors.ll
diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
index 6579d3405cf39..e467e3c8a218c 100644
--- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
@@ -10,6 +10,7 @@
#include "DirectX.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/DXILResource.h"
+#include "llvm/Frontend/HLSL/HLSLResource.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
@@ -20,6 +21,7 @@
#include "llvm/IR/IntrinsicsDirectX.h"
#include "llvm/IR/User.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#define DEBUG_TYPE "dxil-resource-access"
@@ -44,16 +46,28 @@ static Value *calculateGEPOffset(GetElementPtrInst *GEP, Value *PrevOffset,
APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
if (GEP->accumulateConstantOffset(DL, ConstantOffset)) {
APInt Scaled = ConstantOffset.udiv(ScalarSize);
- return ConstantInt::get(Type::getInt32Ty(GEP->getContext()), Scaled);
+ return ConstantInt::get(DL.getIndexType(GEP->getType()), Scaled);
}
- auto IndexIt = GEP->idx_begin();
- assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 &&
- "GEP is not indexing through pointer");
- ++IndexIt;
- Value *Offset = *IndexIt;
- assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP");
- return Offset;
+ unsigned NumIndices = GEP->getNumIndices();
+
+ // If we have a single index we're indexing into a top level array. This
+ // generally only happens with cbuffers.
+ if (NumIndices == 1)
+ return *GEP->idx_begin();
+
+ // If we have two indices, this should be a simple access through a pointer.
+ if (NumIndices == 2) {
+ auto IndexIt = GEP->idx_begin();
+ assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 &&
+ "GEP is not indexing through pointer");
+ ++IndexIt;
+ Value *Offset = *IndexIt;
+ assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP");
+ return Offset;
+ }
+
+ llvm_unreachable("Unhandled GEP structure for resource access");
}
static void createTypedBufferStore(IntrinsicInst *II, StoreInst *SI,
@@ -171,6 +185,123 @@ static void createRawLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset) {
LI->replaceAllUsesWith(V);
}
+namespace {
+/// Helper for building a `load.cbufferrow` intrinsic given a simple type.
+struct CBufferRowIntrin {
+ Intrinsic::ID IID;
+ Type *RetTy;
+ unsigned int EltSize;
+ unsigned int NumElts;
+
+ CBufferRowIntrin(const DataLayout &DL, Type *Ty) {
+ assert(Ty == Ty->getScalarType() && "Expected scalar type");
+
+ switch (DL.getTypeSizeInBits(Ty)) {
+ case 16:
+ IID = Intrinsic::dx_resource_load_cbufferrow_8;
+ RetTy = StructType::get(Ty, Ty, Ty, Ty, Ty, Ty, Ty, Ty);
+ EltSize = 2;
+ NumElts = 8;
+ break;
+ case 32:
+ IID = Intrinsic::dx_resource_load_cbufferrow_4;
+ RetTy = StructType::get(Ty, Ty, Ty, Ty);
+ EltSize = 4;
+ NumElts = 4;
+ break;
+ case 64:
+ IID = Intrinsic::dx_resource_load_cbufferrow_2;
+ RetTy = StructType::get(Ty, Ty);
+ EltSize = 8;
+ NumElts = 2;
+ break;
+ default:
+ llvm_unreachable("Only 16, 32, and 64 bit types supported");
+ }
+ }
+};
+} // namespace
+
+static void createCBufferLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset,
+ dxil::ResourceTypeInfo &RTI) {
+ const DataLayout &DL = LI->getDataLayout();
+
+ Type *Ty = LI->getType();
+ assert(!isa<StructType>(Ty) && "Structs not handled yet");
+ CBufferRowIntrin Intrin(DL, Ty->getScalarType());
+
+ StringRef Name = LI->getName();
+ Value *Handle = II->getOperand(0);
+
+ IRBuilder<> Builder(LI);
+
+ ConstantInt *GlobalOffset = dyn_cast<ConstantInt>(II->getOperand(1));
+ assert(GlobalOffset && "CBuffer getpointer index must be constant");
+
+ unsigned int FixedOffset = GlobalOffset->getZExtValue();
+ // If we have a further constant offset we can just fold it in to the fixed
+ // offset.
+ if (auto *ConstOffset = dyn_cast_if_present<ConstantInt>(Offset)) {
+ FixedOffset += ConstOffset->getZExtValue();
+ Offset = nullptr;
+ }
+
+ Value *CurrentRow = ConstantInt::get(
+ Builder.getInt32Ty(), FixedOffset / hlsl::CBufferRowSizeInBytes);
+ unsigned int CurrentIndex =
+ (FixedOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize;
+
+ assert(!(CurrentIndex && Offset) &&
+ "Dynamic indexing into elements of cbuffer rows is not supported");
+ if (Offset)
+ CurrentRow = FixedOffset ? Builder.CreateAdd(CurrentRow, Offset) : Offset;
+
+ auto *CBufLoad = Builder.CreateIntrinsic(
+ Intrin.RetTy, Intrin.IID, {Handle, CurrentRow}, nullptr, Name + ".load");
+ auto *Elt =
+ Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, Name + ".extract");
+
+ unsigned int Remaining =
+ ((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1;
+ if (Remaining == 0) {
+ // We only have a single element, so we're done.
+ Value *Result = Elt;
+
+ // However, if we loaded a <1 x T>, then we need to adjust the type.
+ if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
+ assert(VT->getNumElements() == 1 && "Can't have multiple elements here");
+ Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result,
+ Builder.getInt32(0), Name);
+ }
+ LI->replaceAllUsesWith(Result);
+ return;
+ }
+
+ // Walk each element and extract it, wrapping to new rows as needed.
+ SmallVector<Value *> Extracts{Elt};
+ while (Remaining--) {
+ CurrentIndex %= Intrin.NumElts;
+
+ if (CurrentIndex == 0) {
+ CurrentRow = Builder.CreateAdd(CurrentRow,
+ ConstantInt::get(Builder.getInt32Ty(), 1));
+ CBufLoad = Builder.CreateIntrinsic(Intrin.RetTy, Intrin.IID,
+ {Handle, CurrentRow}, nullptr,
+ Name + ".load");
+ }
+
+ Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++},
+ Name + ".extract"));
+ }
+
+ // Finally, we build up the original loaded value.
+ Value *Result = PoisonValue::get(Ty);
+ for (int I = 0, E = Extracts.size(); I < E; ++I)
+ Result = Builder.CreateInsertElement(
+ Result, Extracts[I], Builder.getInt32(I), Name + formatv(".upto{}", I));
+ LI->replaceAllUsesWith(Result);
+}
+
static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
dxil::ResourceTypeInfo &RTI) {
switch (RTI.getResourceKind()) {
@@ -179,6 +310,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
case dxil::ResourceKind::RawBuffer:
case dxil::ResourceKind::StructuredBuffer:
return createRawLoad(II, LI, Offset);
+ case dxil::ResourceKind::CBuffer:
+ return createCBufferLoad(II, LI, Offset, RTI);
case dxil::ResourceKind::Texture1D:
case dxil::ResourceKind::Texture2D:
case dxil::ResourceKind::Texture2DMS:
@@ -190,9 +323,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
case dxil::ResourceKind::TextureCubeArray:
case dxil::ResourceKind::FeedbackTexture2D:
case dxil::ResourceKind::FeedbackTexture2DArray:
- case dxil::ResourceKind::CBuffer:
case dxil::ResourceKind::TBuffer:
- // TODO: handle these
+ reportFatalUsageError("Load not yet implemented for resource type");
return;
case dxil::ResourceKind::Sampler:
case dxil::ResourceKind::RTAccelerationStructure:
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll
new file mode 100644
index 0000000000000..22fba8c1d5f8c
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll
@@ -0,0 +1,59 @@
+; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s
+;
+; Tests for indexed types in dynamically indexed arrays in cbuffers.
+;
+; struct S {
+; float x[2];
+; uint q;
+; };
+; cbuffer CB : register(b0) {
+; uint32_t3 w[3]; // offset 0, size 12 (+4) * 3
+; S v[3]; // offset 48, size 24 (+8) * 3
+; }
+%S = type <{ <{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, i32 }>
+%__cblayout_CB = type <{
+ <{
+ [2 x <{ <3 x i32>, target("dx.Padding", 4) }>],
+ <3 x i32>
+ }>,
+ target("dx.Padding", 4),
+ <{
+ [2 x <{ %S, target("dx.Padding", 8) }>], %S
+ }>
+}>
+
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
+
+; CHECK: define void @f
+define void @f(ptr %dst, i32 %idx) {
+entry:
+ %CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null)
+ store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4
+
+ ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+ %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4
+
+ ;; w[2].z
+ ;
+ ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2)
+ ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
+ ; CHECK: store i32 [[X]], ptr %dst
+ %w_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
+ %w_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %w_ptr, i32 40
+ %w_load = load i32, ptr addrspace(2) %w_gep, align 4
+ store i32 %w_load, ptr %dst, align 4
+
+ ;; v[2].q
+ ;
+ ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 8)
+ ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4
+ ; CHECK: store i32 [[X]], ptr [[PTR]]
+ %v_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 48)
+ %v_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %v_ptr, i32 84
+ %v_load = load i32, ptr addrspace(2) %v_gep, align 4
+ %v.i = getelementptr inbounds nuw i8, ptr %dst, i32 4
+ store i32 %v_load, ptr %v.i, align 4
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll
new file mode 100644
index 0000000000000..cbd76fc00b813
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll
@@ -0,0 +1,45 @@
+; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s
+;
+; Test for when we have indices into both the array and the vector: ie, s[1][3]
+
+; cbuffer CB : register(b0) {
+; uint4 s[2]; // offset 0, size 16 * 2
+; }
+%__cblayout_CB = type <{ [2 x <4 x i32>] }>
+
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
+
+; CHECK: define void @f
+define void @f(ptr %dst) {
+entry:
+ %CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null)
+ store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4
+
+ ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+ %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4
+
+ ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
+ ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3
+ ; CHECK: store i32 [[X]], ptr %dst
+ %i8_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
+ %i8_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %i8_ptr, i32 28
+ %i8_vecext = load i32, ptr addrspace(2) %i8_gep, align 4
+ store i32 %i8_vecext, ptr %dst, align 4
+
+ ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
+ ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3
+ ;;
+ ;; It would be nice to avoid the redundant vector creation here, but that's
+ ;; outside of the scope of this pass.
+ ;;
+ ; CHECK: [[X_VEC:%.*]] = insertelement <4 x i32> {{%.*}}, i32 [[X]], i32 3
+ ; CHECK: [[X_EXT:%.*]] = extractelement <4 x i32> [[X_VEC]], i32 3
+ ; CHECK: store i32 [[X_EXT]], ptr %dst
+ %typed_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
+ %typed_gep = getelementptr <4 x i32>, ptr addrspace(2) %typed_ptr, i32 1
+ %typed_load = load <4 x i32>, ptr addrspace(2) %typed_gep, align 16
+ %typed_vecext = extractelement <4 x i32> %typed_load, i32 3
+ store i32 %typed_vecext, ptr %dst, align 4
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-typedgep.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-typedgep.ll
new file mode 100644
index 0000000000000..a561d8c0aea5d
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-typedgep.ll
@@ -0,0 +1,27 @@
+; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s
+
+; cbuffer CB : register(b0) {
+; float a1[3];
+; }
+%__cblayout_CB = type <{ [2 x <{ float, [12 x i8] }>], float }>
+
+ at CB.cb = global target("dx.CBuffer", %__cblayout_CB) poison
+
+; CHECK: define void @f
+define void @f(ptr %dst) {
+entry:
+ %CB.cb_h = call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+ store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4
+
+ ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+ ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
+ ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
+ ; CHECK: store float [[X]], ptr %dst
+ %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 8
+ %a1_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
+ %a1_gep = getelementptr inbounds <{ [2 x <{ float, [12 x i8] }>], float }>, ptr addrspace(2) %a1_ptr, i32 0, i32 0, i32 1
+ %a1 = load float, ptr addrspace(2) %a1_gep, align 4
+ store float %a1, ptr %dst, align 32
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-arrays.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-arrays.ll
new file mode 100644
index 0000000000000..68dfbb184083f
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-arrays.ll
@@ -0,0 +1,129 @@
+; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s
+
+; cbuffer CB : register(b0) {
+; float a1[3]; // offset 0, size 4 (+12) * 3
+; double3 a2[2]; // offset 48, size 24 (+8) * 2
+; float16_t a3[2][2]; // offset 112, size 2 (+14) * 4
+; uint64_t a4[3]; // offset 176, size 8 (+8) * 3
+; int4 a5[2][3][4]; // offset 224, size 16 * 24
+; uint16_t a6[1]; // offset 608, size 2 (+14) * 1
+; int64_t a7[2]; // offset 624, size 8 (+8) * 2
+; bool a8[4]; // offset 656, size 4 (+12) * 4
+; }
+%__cblayout_CB = type <{
+ <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, target("dx.Padding", 12),
+ <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, target("dx.Padding", 8),
+ <{ [3 x <{ half, target("dx.Padding", 14) }>], half }>, target("dx.Padding", 14),
+ <{ [2 x <{ i64, target("dx.Padding", 8) }>], i64 }>, target("dx.Padding", 8),
+ [24 x <4 x i32>],
+ [1 x i16], target("dx.Padding", 14),
+ <{ [1 x <{ i64, target("dx.Padding", 8) }>], i64 }>, target("dx.Padding", 8),
+ <{ [3 x <{ i32, target("dx.Padding", 12) }>], i32 }>
+}>
+
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
+
+; CHECK: define void @f
+define void @f(ptr %dst) {
+entry:
+ %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+ store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4
+
+ ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+ %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4
+
+ ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
+ ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
+ ; CHECK: store float [[X]], ptr %dst
+ %a1_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
+ %a1_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a1_ptr, i32 16
+ %a1 = load float, ptr addrspace(2) %a1_gep, align 4
+ store float %a1, ptr %dst, align 32
+
+ ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 5)
+ ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
+ ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
+ ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 6)
+ ; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
+ ; CHECK: [[VEC0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
+ ; CHECK: [[VEC1:%.*]] = insertelement <3 x double> [[VEC0]], double [[Y]], i32 1
+ ; CHECK: [[VEC2:%.*]] = insertelement <3 x double> [[VEC1]], double [[Z]], i32 2
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 8
+ ; CHECK: store <3 x double> [[VEC2]], ptr [[PTR]]
+ %a2_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 48)
+ %a2_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a2_ptr, i32 32
+ %a2 = load <3 x double>, ptr addrspace(2) %a2_gep, align 8
+ %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 8
+ store <3 x double> %a2, ptr %a2.i, align 32
+
+ ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 8)
+ ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 32
+ ; CHECK: store half [[X]], ptr [[PTR]]
+ %a3_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 112)
+ %a3_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a3_ptr, i32 16
+ %a3 = load half, ptr addrspace(2) %a3_gep, align 2
+ %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 32
+ store half %a3, ptr %a3.i, align 2
+
+ ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 12)
+ ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 40
+ ; CHECK: store i64 [[X]], ptr [[PTR]]
+ %a4_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 176)
+ %a4_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a4_ptr, i32 16
+ %a4 = load i64, ptr addrspace(2) %a4_gep, align 8
+ %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 40
+ store i64 %a4, ptr %a4.i, align 8
+
+ ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 26)
+ ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
+ ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
+ ; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
+ ; CHECK: [[A:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3
+ ; CHECK: [[VEC0:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
+ ; CHECK: [[VEC1:%.*]] = insertelement <4 x i32> [[VEC0]], i32 [[Y]], i32 1
+ ; CHECK: [[VEC2:%.*]] = insertelement <4 x i32> [[VEC1]], i32 [[Z]], i32 2
+ ; CHECK: [[VEC3:%.*]] = insertelement <4 x i32> [[VEC2]], i32 [[A]], i32 3
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 48
+ ; CHECK: store <4 x i32> [[VEC3]], ptr [[PTR]]
+ %a5_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 224)
+ %a5_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a5_ptr, i32 192
+ %a5 = load <4 x i32>, ptr addrspace(2) %a5_gep, align 4
+ %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 48
+ store <4 x i32> %a5, ptr %a5.i, align 4
+
+ ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 38)
+ ; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 64
+ ; CHECK: store i16 [[X]], ptr [[PTR]]
+ %a6_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 608)
+ %a6 = load i16, ptr addrspace(2) %a6_ptr, align 2
+ %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 64
+ store i16 %a6, ptr %a6.i, align 2
+
+ ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 40)
+ ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 72
+ ; CHECK: store i64 [[X]], ptr [[PTR]]
+ %a7_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 624)
+ %a7_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a7_ptr, i32 16
+ %a7 = load i64, ptr addrspace(2) %a7_gep, align 8
+ %a7.i = getelementptr inbounds nuw i8, ptr %dst, i32 72
+ store i64 %a7, ptr %a7.i, align 8
+
+ ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 42)
+ ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 80
+ ; CHECK: store i32 [[X]], ptr [[PTR]]
+ %a8_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 656)
+ %a8_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a8_ptr, i32 16
+ %a8 = load i32, ptr addrspace(2) %a8_gep, align 4, !range !0, !noundef !1
+ %a8.i = getelementptr inbounds nuw i8, ptr %dst, i32 80
+ store i32 %a8, ptr %a8.i, align 4
+
+ ret void
+}
+
+!0 = !{i32 0, i32 2}
+!1 = !{}
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic-struct.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic-struct.ll
new file mode 100644
index 0000000000000..9c2ec2399d8d5
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic-struct.ll
@@ -0,0 +1,64 @@
+; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s
+;
+; Tests for indexed types in dynamically indexed arrays in cbuffers.
+;
+; Bug https://github.com/llvm/llvm-project/issues/164517
+; XFAIL: *
+;
+; struct S {
+; float x[2];
+; uint q;
+; };
+; cbuffer CB : register(b0) {
+; uint32_t3 w[3]; // offset 0, size 12 (+4) * 3
+; S v[3]; // offset 48, size 24 (+8) * 3
+; }
+%S = type <{ <{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, i32 }>
+%__cblayout_CB = type <{
+ <{
+ [2 x <{ <3 x i32>, target("dx.Padding", 4) }>],
+ <3 x i32>
+ }>,
+ target("dx.Padding", 4),
+ <{
+ [2 x <{ %S, target("dx.Padding", 8) }>], %S
+ }>
+}>
+
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
+
+; CHECK: define void @f
+define void @f(ptr %dst, i32 %idx) {
+entry:
+ %CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null)
+ store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4
+
+ ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+ %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4
+
+ ;; w[Tid.x].z
+ ;
+ ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 %idx)
+ ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
+ ; CHECK: store i32 [[X]], ptr %dst
+ %w_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
+ %w_arrayidx = getelementptr <3 x i32>, ptr addrspace(2) %w_ptr, i32 %idx
+ %w_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %w_arrayidx, i32 4
+ %w_load = load i32, ptr addrspace(2) %w_gep, align 4
+ store i32 %w_load, ptr %dst, align 4
+
+ ;; v[Tid.x].q
+ ;
+ ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 %idx)
+ ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4
+ ; CHECK: store i32 [[X]], ptr [[PTR]]
+ %v_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 48)
+ %v_arrayidx = getelementptr <{ %struct.S, target("dx.Padding", 4) }>, ptr addrspace(2) %v_ptr, i32 %idx
+ %v_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %v_arrayidx, i32 8
+ %v_load = load i32, ptr addrspace(2) %v_gep, align 4
+ %v.i = getelementptr inbounds nuw i8, ptr %dst, i32 4
+ store i32 %v_load, ptr %v.i, align 4
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll
new file mode 100644
index 0000000000000..1b486a1f40f22
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll
@@ -0,0 +1,42 @@
+; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s
+;
+; Tests for dynamic indices into arrays in cbuffers.
+
+; cbuffer CB : register(b0) {
+; uint s[10]; // offset 0, size 4 (+12) * 10
+; uint t[10]; // offset 160, size 4 (+12) * 10
+; }
+%__cblayout_CB = type <{ <{ [9 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>
+
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
+
+; CHECK: define void @f
+define void @f(ptr %dst, i32 %idx) {
+entry:
+ %CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null)
+ store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4
+
+ ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+ %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4
+
+ ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 %idx)
+ ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
+ ; CHECK: store i32 [[X]], ptr %dst
+ %s_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
+ %s_gep = getelementptr <{ i32, target("dx.Padding", 12) }>, ptr addrspace(2) %s_ptr, i32 %idx
+ %s_load = load i32, ptr addrspace(2) %s_gep, align 4
+ store i32 %s_load, ptr %dst, align 4
+
+ ; CHECK: [[T_IDX:%.*]] = add i32 10, %idx
+ ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 [[T_IDX]])
+ ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4
+ ; CHECK: store i32 [[X]], ptr [[PTR]]
+ %t_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 160)
+ %t_gep = getelementptr <{ i32, target("dx.Padding", 12) }>, ptr addrspace(2) %t_ptr, i32 %idx
+ %t_load = load i32, ptr addrspace(2) %t_gep, align 4
+ %t.i = getelementptr inbounds nuw i8, ptr %dst, i32 4
+ store i32 %t_load, ptr %t.i, align 4
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-scalars.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-scalars.ll
new file mode 100644
index 0000000000000..5a8275b4a6c12
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-scalars.ll
@@ -0,0 +1,87 @@
+; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s
+
+; cbuffer CB {
+; float a1; // offset 0, size 4
+; int a2; // offset 4, size 4
+; bool a3; // offset 8, size 4
+; float16_t a4; // offset 12, size 2
+; uint16_t a5; // offset 14, size 2
+; double a6; // offset 16, size 8
+; int64_t a7; // offset 24, size 8
+; }
+%__cblayout_CB = type <{ float, i32, i32, half, i16, double, i64 }>
+
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
+
+; CHECK: define void @f
+define void @f(ptr %dst) {
+entry:
+ %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+ store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4
+
+ ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+ %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 8
+
+ ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0)
+ ; CHECK: [[A1:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
+ ; CHECK: store float [[A1]], ptr %dst
+ %a1_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
+ %a1 = load float, ptr addrspace(2) %a1_ptr, align 4
+ store float %a1, ptr %dst, align 8
+
+ ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0)
+ ; CHECK: [[A2:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4
+ ; CHECK: store i32 [[A2]], ptr [[PTR]]
+ %a2_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 4)
+ %a2 = load i32, ptr addrspace(2) %a2_ptr, align 4
+ %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 4
+ store i32 %a2, ptr %a2.i, align 8
+
+ ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0)
+ ; CHECK: [[A3:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 8
+ ; CHECK: store i32 [[A3]], ptr [[PTR]]
+ %a3_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 8)
+ %a3 = load i32, ptr addrspace(2) %a3_ptr, align 4
+ %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 8
+ store i32 %a3, ptr %a3.i, align 4
+
+ ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0)
+ ; CHECK: [[A4:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 6
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 12
+ ; CHECK: store half [[A4]], ptr [[PTR]]
+ %a4_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 12)
+ %a4 = load half, ptr addrspace(2) %a4_ptr, align 2
+ %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 12
+ store half %a4, ptr %a4.i, align 4
+
+ ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0)
+ ; CHECK: [[A5:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 7
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 14
+ ; CHECK: store i16 [[A5]], ptr [[PTR]]
+ %a5_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 14)
+ %a5 = load i16, ptr addrspace(2) %a5_ptr, align 2
+ %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 14
+ store i16 %a5, ptr %a5.i, align 2
+
+ ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
+ ; CHECK: [[A6:%.*]] = extractvalue { double, double } [[LOAD]], 0
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 16
+ ; CHECK: store double [[A6]], ptr [[PTR]]
+ %a6_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 16)
+ %a6 = load double, ptr addrspace(2) %a6_ptr, align 8
+ %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 16
+ store double %a6, ptr %a6.i, align 8
+
+ ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
+ ; CHECK: [[A7:%.*]] = extractvalue { i64, i64 } [[LOAD]], 1
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 24
+ ; CHECK: store i64 [[A7]], ptr [[PTR]]
+ %a7_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 24)
+ %a7 = load i64, ptr addrspace(2) %a7_ptr, align 8
+ %a7.i = getelementptr inbounds nuw i8, ptr %dst, i32 24
+ store i64 %a7, ptr %a7.i, align 8
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-vectors.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-vectors.ll
new file mode 100644
index 0000000000000..1f20d0101c151
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-vectors.ll
@@ -0,0 +1,109 @@
+; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s
+
+; cbuffer CB {
+; float3 a1; // offset 0, size 12 (+4)
+; double3 a2; // offset 16, size 24
+; float16_t2 a3; // offset 40, size 4 (+4)
+; uint64_t3 a4; // offset 48, size 24 (+8)
+; int4 a5; // offset 80, size 16
+; uint16_t3 a6; // offset 96, size 6 (+10)
+; };
+%__cblayout_CB = type <{ <3 x float>, target("dx.Padding", 4), <3 x double>, <2 x half>, target("dx.Padding", 4), <3 x i64>, target("dx.Padding", 8), <4 x i32>, <3 x i16> }>
+
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
+
+; CHECK: define void @f
+define void @f(ptr %dst) {
+entry:
+ %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+ store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4
+
+ ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+ %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 8
+
+ ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0)
+ ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
+ ; CHECK: [[Y:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 1
+ ; CHECK: [[Z:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 2
+ ; CHECK: [[VEC0:%.*]] = insertelement <3 x float> poison, float [[X]], i32 0
+ ; CHECK: [[VEC1:%.*]] = insertelement <3 x float> [[VEC0]], float [[Y]], i32 1
+ ; CHECK: [[VEC2:%.*]] = insertelement <3 x float> [[VEC1]], float [[Z]], i32 2
+ ; CHECK: store <3 x float> [[VEC2]], ptr %dst
+ %a1_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
+ %a1 = load <3 x float>, ptr addrspace(2) %a1_gep, align 16
+ store <3 x float> %a1, ptr %dst, align 4
+
+ ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
+ ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
+ ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
+ ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2)
+ ; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
+ ; CHECK: [[VEC0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
+ ; CHECK: [[VEC1:%.*]] = insertelement <3 x double> [[VEC0]], double [[Y]], i32 1
+ ; CHECK: [[VEC2:%.*]] = insertelement <3 x double> [[VEC1]], double [[Z]], i32 2
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 16
+ ; CHECK: store <3 x double> [[VEC2]], ptr [[PTR]]
+ %a2_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 16)
+ %a2 = load <3 x double>, ptr addrspace(2) %a2_gep, align 32
+ %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 16
+ store <3 x double> %a2, ptr %a2.i, align 8
+
+ ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2)
+ ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 4
+ ; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 5
+ ; CHECK: [[VEC0:%.*]] = insertelement <2 x half> poison, half [[X]], i32 0
+ ; CHECK: [[VEC1:%.*]] = insertelement <2 x half> [[VEC0]], half [[Y]], i32 1
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 40
+ ; CHECK: store <2 x half> [[VEC1]], ptr [[PTR]]
+ %a3_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 40)
+ %a3 = load <2 x half>, ptr addrspace(2) %a3_gep, align 4
+ %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 40
+ store <2 x half> %a3, ptr %a3.i, align 2
+
+ ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 3)
+ ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
+ ; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 1
+ ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 4)
+ ; CHECK: [[Z:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
+ ; CHECK: [[VEC0:%.*]] = insertelement <3 x i64> poison, i64 [[X]], i32 0
+ ; CHECK: [[VEC1:%.*]] = insertelement <3 x i64> [[VEC0]], i64 [[Y]], i32 1
+ ; CHECK: [[VEC2:%.*]] = insertelement <3 x i64> [[VEC1]], i64 [[Z]], i32 2
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 48
+ ; CHECK: store <3 x i64> [[VEC2]], ptr [[PTR]]
+ %a4_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 48)
+ %a4 = load <3 x i64>, ptr addrspace(2) %a4_gep, align 32
+ %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 48
+ store <3 x i64> %a4, ptr %a4.i, align 8
+
+ ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 5)
+ ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
+ ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
+ ; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
+ ; CHECK: [[A:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3
+ ; CHECK: [[VEC0:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
+ ; CHECK: [[VEC1:%.*]] = insertelement <4 x i32> [[VEC0]], i32 [[Y]], i32 1
+ ; CHECK: [[VEC2:%.*]] = insertelement <4 x i32> [[VEC1]], i32 [[Z]], i32 2
+ ; CHECK: [[VEC3:%.*]] = insertelement <4 x i32> [[VEC2]], i32 [[A]], i32 3
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 72
+ ; CHECK: store <4 x i32> [[VEC3]], ptr [[PTR]]
+ %a5_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 80)
+ %a5 = load <4 x i32>, ptr addrspace(2) %a5_gep, align 16
+ %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 72
+ store <4 x i32> %a5, ptr %a5.i, align 4
+
+ ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 6)
+ ; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0
+ ; CHECK: [[Y:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 1
+ ; CHECK: [[Z:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 2
+ ; CHECK: [[VEC0:%.*]] = insertelement <3 x i16> poison, i16 [[X]], i32 0
+ ; CHECK: [[VEC1:%.*]] = insertelement <3 x i16> [[VEC0]], i16 [[Y]], i32 1
+ ; CHECK: [[VEC2:%.*]] = insertelement <3 x i16> [[VEC1]], i16 [[Z]], i32 2
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 88
+ ; CHECK: store <3 x i16> [[VEC2]], ptr [[PTR]]
+ %a6_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 96)
+ %a6 = load <3 x i16>, ptr addrspace(2) %a6_gep, align 8
+ %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 88
+ store <3 x i16> %a6, ptr %a6.i, align 2
+
+ ret void
+}
>From 5cb35eb91470f2bf3b8b70c92c2a4bc5c19a78e9 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Tue, 9 Sep 2025 14:08:37 -0700
Subject: [PATCH 4/6] [DirectX] Fix crash when naming buffers of arrays
DXILResource was falling over trying to name a resource type that
contained an array, such as `StructuredBuffer<float[3][2]>`. Handle this
by walking through array types to gather the dimensions.
---
llvm/lib/Analysis/DXILResource.cpp | 8 ++++++++
llvm/test/CodeGen/DirectX/Metadata/resource-symbols.ll | 9 ++++++++-
2 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp
index f9bf09262dd1f..6f19a68dcd194 100644
--- a/llvm/lib/Analysis/DXILResource.cpp
+++ b/llvm/lib/Analysis/DXILResource.cpp
@@ -255,6 +255,12 @@ static void formatTypeName(SmallString<64> &Dest, StringRef Name,
if (!ContainedType)
return;
+ SmallVector<uint64_t> ArrayDimensions;
+ while (ArrayType *AT = dyn_cast<ArrayType>(ContainedType)) {
+ ArrayDimensions.push_back(AT->getNumElements());
+ ContainedType = AT->getElementType();
+ }
+
StringRef ElementName;
ElementType ET = toDXILElementType(ContainedType, IsSigned);
if (ET != ElementType::Invalid) {
@@ -271,6 +277,8 @@ static void formatTypeName(SmallString<64> &Dest, StringRef Name,
DestStream << "<" << ElementName;
if (const FixedVectorType *VTy = dyn_cast<FixedVectorType>(ContainedType))
DestStream << VTy->getNumElements();
+ for (uint64_t Dim : ArrayDimensions)
+ DestStream << "[" << Dim << "]";
DestStream << ">";
}
diff --git a/llvm/test/CodeGen/DirectX/Metadata/resource-symbols.ll b/llvm/test/CodeGen/DirectX/Metadata/resource-symbols.ll
index 4f13f4789cd66..56798c8382d45 100644
--- a/llvm/test/CodeGen/DirectX/Metadata/resource-symbols.ll
+++ b/llvm/test/CodeGen/DirectX/Metadata/resource-symbols.ll
@@ -28,6 +28,11 @@ define void @test() {
@llvm.dx.resource.handlefrombinding(i32 0, i32 10, i32 1, i32 0, ptr @SB.str)
; CHECK: %"StructuredBuffer<struct.S>" = type { %struct.S }
+ ; StructuredBuffer<float[3][2]>
+ %struct1 = call target("dx.RawBuffer", [3 x [2 x float]], 0, 0)
+ @llvm.dx.resource.handlefrombinding(i32 0, i32 12, i32 1, i32 0, ptr null)
+ ; CHECK: %"StructuredBuffer<float[3][2]>" = type { [3 x [2 x float]] }
+
; ByteAddressBuffer
%byteaddr = call target("dx.RawBuffer", i8, 0, 0)
@llvm.dx.resource.handlefrombinding(i32 0, i32 20, i32 1, i32 0, ptr null)
@@ -40,12 +45,14 @@ define void @test() {
; CHECK-NEXT: @[[T1:.*]] = external constant %"Buffer<int32_t>"
; CHECK-NEXT: @[[T2:.*]] = external constant %"Buffer<uint32_t3>"
; CHECK-NEXT: @[[S0:.*]] = external constant %"StructuredBuffer<struct.S>"
+; CHECK-NEXT: @[[S1:.*]] = external constant %"StructuredBuffer<float[3][2]>"
; CHECK-NEXT: @[[B0:.*]] = external constant %ByteAddressBuffer
; CHECK: !{i32 0, ptr @[[T0]], !"A"
; CHECK: !{i32 1, ptr @[[T1]], !""
; CHECK: !{i32 2, ptr @[[T2]], !""
; CHECK: !{i32 3, ptr @[[S0]], !"SB"
-; CHECK: !{i32 4, ptr @[[B0]], !""
+; CHECK: !{i32 4, ptr @[[S1]], !""
+; CHECK: !{i32 5, ptr @[[B0]], !""
attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
>From a67e0333d906d512ab0a2457c749751ee663de04 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Wed, 11 Jun 2025 16:21:41 -0400
Subject: [PATCH 5/6] wip: Use explicit padding
Fixes #147352
---
clang/lib/CodeGen/CGExpr.cpp | 26 ++
clang/lib/CodeGen/CGExprAgg.cpp | 4 +
clang/lib/CodeGen/CGHLSLRuntime.cpp | 172 +++++++++-
clang/lib/CodeGen/CGHLSLRuntime.h | 23 +-
clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp | 270 ++++-----------
clang/lib/CodeGen/HLSLBufferLayoutBuilder.h | 47 ++-
clang/lib/CodeGen/TargetInfo.h | 14 +
clang/lib/CodeGen/Targets/DirectX.cpp | 20 +-
clang/lib/CodeGen/Targets/SPIR.cpp | 21 +-
clang/test/CodeGenHLSL/ArrayAssignable.hlsl | 70 ++--
.../GlobalConstructorFunction.hlsl | 6 +-
clang/test/CodeGenHLSL/resources/cbuffer.hlsl | 251 +++++++++-----
.../resources/cbuffer_and_namespaces.hlsl | 10 +-
.../resources/cbuffer_with_packoffset.hlsl | 13 +-
...uffer_with_static_global_and_function.hlsl | 2 +-
.../resources/default_cbuffer.hlsl | 11 +-
.../default_cbuffer_with_layout.hlsl | 3 +
llvm/docs/DirectX/DXILResources.rst | 10 +-
llvm/include/llvm/Frontend/HLSL/CBuffer.h | 6 +-
llvm/lib/Frontend/HLSL/CBuffer.cpp | 35 +-
llvm/lib/IR/Type.cpp | 4 +
llvm/lib/Target/DirectX/DXILCBufferAccess.cpp | 310 ++----------------
.../Target/DirectX/DirectXTargetMachine.cpp | 2 +-
llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp | 7 +-
llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 3 -
.../DirectX/CBufferAccess/array-typedgep.ll | 32 --
.../CodeGen/DirectX/CBufferAccess/arrays.ll | 124 ++-----
.../CodeGen/DirectX/CBufferAccess/float.ll | 25 --
.../DirectX/CBufferAccess/gep-ce-two-uses.ll | 22 +-
.../CodeGen/DirectX/CBufferAccess/memcpy.ll | 216 ------------
.../CodeGen/DirectX/CBufferAccess/scalars.ll | 73 +----
.../CodeGen/DirectX/CBufferAccess/vectors.ll | 119 -------
llvm/test/CodeGen/DirectX/llc-pipeline.ll | 2 +-
.../CodeGen/SPIRV/hlsl-resources/cbuffer.ll | 8 +-
.../SPIRV/hlsl-resources/cbuffer_unused.ll | 18 +-
35 files changed, 710 insertions(+), 1269 deletions(-)
delete mode 100644 llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll
delete mode 100644 llvm/test/CodeGen/DirectX/CBufferAccess/float.ll
delete mode 100644 llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll
delete mode 100644 llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index fd73314c9f84c..a2e6f2f4f4817 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4869,6 +4869,32 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
emitArraySubscriptGEP(*this, Int8Ty, Addr.emitRawPointer(*this),
ScaledIdx, false, SignedIndices, E->getExprLoc());
Addr = Address(EltPtr, OrigBaseElemTy, EltAlign);
+ } else if (getLangOpts().HLSL &&
+ E->getType().getAddressSpace() == LangAS::hlsl_constant) {
+ // This is an array inside of a cbuffer.
+ Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo);
+
+ SmallVector<llvm::Value *, 2> Indices;
+ Indices.push_back(EmitIdxAfterBase(/*Promote*/true));
+
+ CharUnits ElementSize = getContext().getTypeSizeInChars(E->getType());
+ CharUnits RowAlignedSize = ElementSize.alignTo(CharUnits::fromQuantity(16));
+
+ llvm::Type *EltTyToIndex = Addr.getElementType();
+ if (RowAlignedSize > ElementSize) {
+ llvm::Type *Padding = CGM.getTargetCodeGenInfo().getHLSLPadding(
+ CGM, RowAlignedSize - ElementSize);
+ EltTyToIndex = llvm::StructType::get(
+ getLLVMContext(), {EltTyToIndex, Padding}, /*isPacked=*/true);
+ Indices.push_back(llvm::ConstantInt::get(Indices[0]->getType(), 0));
+ }
+
+ CharUnits EltAlign =
+ getArrayElementAlign(Addr.getAlignment(), Indices[0], RowAlignedSize);
+ llvm::Value *EltPtr =
+ emitArraySubscriptGEP(*this, EltTyToIndex, Addr.emitRawPointer(*this),
+ Indices, false, SignedIndices, E->getExprLoc());
+ Addr = Address(EltPtr, Addr.getElementType(), EltAlign);
} else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) {
// If this is A[i] where A is an array, the frontend will have decayed the
// base to be a ArrayToPointerDecay implicit cast. While correct, it is
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index eee397f1f3d19..bd7d30b10d4d0 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -2279,6 +2279,10 @@ void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty,
}
}
+ if (getLangOpts().HLSL && Ty.getAddressSpace() == LangAS::hlsl_constant)
+ if (CGM.getHLSLRuntime().emitBufferCopy(*this, DestPtr, SrcPtr, Ty))
+ return;
+
// Aggregate assignment turns into llvm.memcpy. This is almost valid per
// C99 6.5.16.1p3, which states "If the value being stored in an object is
// read from another object that overlaps in anyway the storage of the first
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index ecab9336a9f82..bd86df9d68094 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -13,10 +13,10 @@
//===----------------------------------------------------------------------===//
#include "CGHLSLRuntime.h"
-#include "Address.h"
#include "CGDebugInfo.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
+#include "HLSLBufferLayoutBuilder.h"
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Attrs.inc"
@@ -26,6 +26,7 @@
#include "clang/AST/Type.h"
#include "clang/Basic/TargetOptions.h"
#include "clang/Frontend/FrontendDiagnostic.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Frontend/HLSL/RootSignatureMetadata.h"
@@ -43,6 +44,8 @@
#include <cstdint>
#include <optional>
+#define DEBUG_TYPE "cghlslruntime"
+
using namespace clang;
using namespace CodeGen;
using namespace clang::hlsl;
@@ -265,9 +268,9 @@ CGHLSLRuntime::convertHLSLSpecificType(const Type *T,
assert(T->isHLSLSpecificType() && "Not an HLSL specific type!");
// Check if the target has a specific translation for this type first.
- if (llvm::Type *TargetTy =
+ if (llvm::Type *LayoutTy =
CGM.getTargetCodeGenInfo().getHLSLType(CGM, T, Packoffsets))
- return TargetTy;
+ return LayoutTy;
llvm_unreachable("Generic handling of HLSL types is not supported.");
}
@@ -284,10 +287,8 @@ void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl,
// get the layout struct from constant buffer target type
llvm::Type *BufType = BufGV->getValueType();
- llvm::Type *BufLayoutType =
- cast<llvm::TargetExtType>(BufType)->getTypeParameter(0);
llvm::StructType *LayoutStruct = cast<llvm::StructType>(
- cast<llvm::TargetExtType>(BufLayoutType)->getTypeParameter(0));
+ cast<llvm::TargetExtType>(BufType)->getTypeParameter(0));
// Start metadata list associating the buffer global variable with its
// constatns
@@ -326,6 +327,9 @@ void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl,
continue;
}
+ if (CGM.getTargetCodeGenInfo().isHLSLPadding(*ElemIt))
+ ++ElemIt;
+
assert(ElemIt != LayoutStruct->element_end() &&
"number of elements in layout struct does not match");
llvm::Type *LayoutType = *ElemIt++;
@@ -423,12 +427,11 @@ void CGHLSLRuntime::addBuffer(const HLSLBufferDecl *BufDecl) {
if (BufDecl->hasValidPackoffset())
fillPackoffsetLayout(BufDecl, Layout);
- llvm::TargetExtType *TargetTy =
- cast<llvm::TargetExtType>(convertHLSLSpecificType(
- ResHandleTy, BufDecl->hasValidPackoffset() ? &Layout : nullptr));
+ llvm::Type *LayoutTy = convertHLSLSpecificType(
+ ResHandleTy, BufDecl->hasValidPackoffset() ? &Layout : nullptr);
llvm::GlobalVariable *BufGV = new GlobalVariable(
- TargetTy, /*isConstant*/ false,
- GlobalValue::LinkageTypes::ExternalLinkage, PoisonValue::get(TargetTy),
+ LayoutTy, /*isConstant*/ false,
+ GlobalValue::LinkageTypes::ExternalLinkage, PoisonValue::get(LayoutTy),
llvm::formatv("{0}{1}", BufDecl->getName(),
BufDecl->isCBuffer() ? ".cb" : ".tb"),
GlobalValue::NotThreadLocal);
@@ -454,7 +457,7 @@ void CGHLSLRuntime::addRootSignature(
SignatureDecl->getRootElements(), nullptr, M);
}
-llvm::TargetExtType *
+llvm::StructType *
CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) {
const auto Entry = LayoutTypes.find(StructType);
if (Entry != LayoutTypes.end())
@@ -463,7 +466,7 @@ CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) {
}
void CGHLSLRuntime::addHLSLBufferLayoutType(const RecordType *StructType,
- llvm::TargetExtType *LayoutTy) {
+ llvm::StructType *LayoutTy) {
assert(getHLSLBufferLayoutType(StructType) == nullptr &&
"layout type for this struct already exist");
LayoutTypes[StructType] = LayoutTy;
@@ -997,3 +1000,146 @@ std::optional<LValue> CGHLSLRuntime::emitResourceArraySubscriptExpr(
}
return CGF.MakeAddrLValue(TmpVar, ResultTy, AlignmentSource::Decl);
}
+
+namespace {
+class HLSLBufferCopyEmitter {
+ CodeGenFunction &CGF;
+ Address DestPtr;
+ Address SrcPtr;
+ llvm::Type *LayoutTy = nullptr;
+
+ SmallVector<llvm::Value *> CurStoreIndices;
+ SmallVector<llvm::Value *> CurLoadIndices;
+
+ void emitCopyAtIndices(llvm::Type *FieldTy, llvm::ConstantInt *StoreIndex,
+ llvm::ConstantInt *LoadIndex) {
+ CurStoreIndices.push_back(StoreIndex);
+ CurLoadIndices.push_back(LoadIndex);
+ auto RestoreIndices = llvm::make_scope_exit([&]() {
+ CurStoreIndices.pop_back();
+ CurLoadIndices.pop_back();
+ });
+
+ if (processArray(FieldTy))
+ return;
+ if (processBufferLayoutArray(FieldTy))
+ return;
+ if (processStruct(FieldTy))
+ return;
+
+ // We have a scalar or vector element - emit a copy.
+ CharUnits Align = CharUnits::fromQuantity(
+ CGF.CGM.getDataLayout().getABITypeAlign(FieldTy));
+ Address SrcGEP = RawAddress(
+ CGF.Builder.CreateInBoundsGEP(LayoutTy, SrcPtr.getBasePointer(),
+ CurLoadIndices, "cbuf.src"),
+ FieldTy, Align, SrcPtr.isKnownNonNull());
+ Address DestGEP = CGF.Builder.CreateInBoundsGEP(
+ DestPtr, CurStoreIndices, FieldTy, Align, "cbuf.dest");
+ llvm::Value *Load = CGF.Builder.CreateLoad(SrcGEP, "cbuf.load");
+ CGF.Builder.CreateStore(Load, DestGEP);
+ }
+
+ bool processArray(llvm::Type *FieldTy) {
+ auto *AT = dyn_cast<llvm::ArrayType>(FieldTy);
+ if (!AT)
+ return false;
+
+ // If we have an array then there isn't any padding
+ // between elements. We just need to copy each element over.
+ for (unsigned I = 0, E = AT->getNumElements(); I < E; ++I)
+ emitCopyAtIndices(AT->getElementType(),
+ llvm::ConstantInt::get(CGF.SizeTy, I),
+ llvm::ConstantInt::get(CGF.SizeTy, I));
+ return true;
+ }
+
+ bool processBufferLayoutArray(llvm::Type *FieldTy) {
+ auto *ST = dyn_cast<llvm::StructType>(FieldTy);
+ if (!ST || ST->getNumElements() != 2)
+ return false;
+
+ auto *PaddedEltsTy = dyn_cast<llvm::ArrayType>(ST->getElementType(0));
+ if (!PaddedEltsTy)
+ return false;
+
+ auto *PaddedTy = dyn_cast<llvm::StructType>(PaddedEltsTy->getElementType());
+ if (!PaddedTy || PaddedTy->getNumElements() != 2)
+ return false;
+
+ if (!CGF.CGM.getTargetCodeGenInfo().isHLSLPadding(
+ PaddedTy->getElementType(1)))
+ return false;
+
+ llvm::Type *ElementTy = ST->getElementType(1);
+ if (PaddedTy->getElementType(0) != ElementTy)
+ return false;
+
+ // All but the last of the logical array elements are in the padded array.
+ unsigned NumElts = PaddedEltsTy->getNumElements() + 1;
+
+ // Add an extra indirection to the load for the struct and walk the
+ // array prefix.
+ CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 0));
+ for (unsigned I = 0; I < NumElts - 1; ++I) {
+ // We need to copy the element itself, without the padding.
+ CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, I));
+ emitCopyAtIndices(ElementTy, llvm::ConstantInt::get(CGF.SizeTy, I),
+ llvm::ConstantInt::get(CGF.Int32Ty, 0));
+ CurLoadIndices.pop_back();
+ }
+ CurLoadIndices.pop_back();
+
+ // Now copy the last element.
+ emitCopyAtIndices(ElementTy,
+ llvm::ConstantInt::get(CGF.SizeTy, NumElts - 1),
+ llvm::ConstantInt::get(CGF.Int32Ty, 1));
+
+ return true;
+ }
+
+ bool processStruct(llvm::Type *FieldTy) {
+ auto *ST = dyn_cast<llvm::StructType>(FieldTy);
+ if (!ST)
+ return false;
+
+ unsigned Skipped = 0;
+ for (unsigned I = 0, E = ST->getNumElements(); I < E; ++I) {
+ llvm::Type *ElementTy = ST->getElementType(I);
+ if (CGF.CGM.getTargetCodeGenInfo().isHLSLPadding(ElementTy))
+ ++Skipped;
+ else
+ emitCopyAtIndices(ElementTy, llvm::ConstantInt::get(CGF.Int32Ty, I),
+ llvm::ConstantInt::get(CGF.Int32Ty, I + Skipped));
+ }
+ return true;
+ }
+
+public:
+ HLSLBufferCopyEmitter(CodeGenFunction &CGF, Address DestPtr, Address SrcPtr)
+ : CGF(CGF), DestPtr(DestPtr), SrcPtr(SrcPtr) {}
+
+ bool emitCopy(QualType CType) {
+ LayoutTy = HLSLBufferLayoutBuilder(CGF.CGM).layOutType(CType);
+
+ LLVM_DEBUG({
+ dbgs() << "Emitting copy of ";
+ LayoutTy->print(dbgs());
+ dbgs() << "\n";
+ });
+
+ // If we don't have an aggregate, we can just fall back to normal memcpy.
+ if (!LayoutTy->isAggregateType())
+ return false;
+
+ emitCopyAtIndices(LayoutTy, llvm::ConstantInt::get(CGF.SizeTy, 0),
+ llvm::ConstantInt::get(CGF.SizeTy, 0));
+ return true;
+ }
+};
+} // namespace
+
+bool CGHLSLRuntime::emitBufferCopy(CodeGenFunction &CGF, Address DestPtr,
+ Address SrcPtr, QualType CType) {
+ return HLSLBufferCopyEmitter(CGF, DestPtr, SrcPtr).emitCopy(CType);
+}
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 103b4a98f6c26..97c7d0619950a 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -15,20 +15,19 @@
#ifndef LLVM_CLANG_LIB_CODEGEN_CGHLSLRUNTIME_H
#define LLVM_CLANG_LIB_CODEGEN_CGHLSLRUNTIME_H
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IntrinsicsDirectX.h"
-#include "llvm/IR/IntrinsicsSPIRV.h"
-
+#include "Address.h"
#include "clang/AST/Attr.h"
#include "clang/AST/Decl.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/HLSLRuntime.h"
-
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Frontend/HLSL/HLSLResource.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsDirectX.h"
+#include "llvm/IR/IntrinsicsSPIRV.h"
#include <optional>
#include <vector>
@@ -187,16 +186,18 @@ class CGHLSLRuntime {
llvm::Instruction *getConvergenceToken(llvm::BasicBlock &BB);
- llvm::TargetExtType *
- getHLSLBufferLayoutType(const RecordType *LayoutStructTy);
+ llvm::StructType *getHLSLBufferLayoutType(const RecordType *LayoutStructTy);
void addHLSLBufferLayoutType(const RecordType *LayoutStructTy,
- llvm::TargetExtType *LayoutTy);
+ llvm::StructType *LayoutTy);
void emitInitListOpaqueValues(CodeGenFunction &CGF, InitListExpr *E);
std::optional<LValue>
emitResourceArraySubscriptExpr(const ArraySubscriptExpr *E,
CodeGenFunction &CGF);
+ bool emitBufferCopy(CodeGenFunction &CGF, Address DestPtr, Address SrcPtr,
+ QualType CType);
+
private:
void emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl,
llvm::GlobalVariable *BufGV);
@@ -204,7 +205,7 @@ class CGHLSLRuntime {
llvm::GlobalVariable *GV);
llvm::Triple::ArchType getArch();
- llvm::DenseMap<const clang::RecordType *, llvm::TargetExtType *> LayoutTypes;
+ llvm::DenseMap<const clang::RecordType *, llvm::StructType *> LayoutTypes;
};
} // namespace CodeGen
diff --git a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp
index 838903cdcd1ee..99b5602b18bff 100644
--- a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp
+++ b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp
@@ -9,6 +9,7 @@
#include "HLSLBufferLayoutBuilder.h"
#include "CGHLSLRuntime.h"
#include "CodeGenModule.h"
+#include "TargetInfo.h"
#include "clang/AST/Type.h"
#include <climits>
@@ -19,71 +20,21 @@
using namespace clang;
using namespace clang::CodeGen;
-using llvm::hlsl::CBufferRowSizeInBytes;
-namespace {
-
-// Creates a new array type with the same dimentions but with the new
-// element type.
-static llvm::Type *
-createArrayWithNewElementType(CodeGenModule &CGM,
- const ConstantArrayType *ArrayType,
- llvm::Type *NewElemType) {
- const clang::Type *ArrayElemType = ArrayType->getArrayElementTypeNoTypeQual();
- if (ArrayElemType->isConstantArrayType())
- NewElemType = createArrayWithNewElementType(
- CGM, cast<const ConstantArrayType>(ArrayElemType), NewElemType);
- return llvm::ArrayType::get(NewElemType, ArrayType->getSExtSize());
-}
-
-// Returns the size of a scalar or vector in bytes
-static unsigned getScalarOrVectorSizeInBytes(llvm::Type *Ty) {
- assert(Ty->isVectorTy() || Ty->isIntegerTy() || Ty->isFloatingPointTy());
- if (Ty->isVectorTy()) {
- llvm::FixedVectorType *FVT = cast<llvm::FixedVectorType>(Ty);
- return FVT->getNumElements() *
- (FVT->getElementType()->getScalarSizeInBits() / 8);
- }
- return Ty->getScalarSizeInBits() / 8;
-}
-
-} // namespace
+static const CharUnits CBufferRowSize =
+ CharUnits::fromQuantity(llvm::hlsl::CBufferRowSizeInBytes);
namespace clang {
namespace CodeGen {
-// Creates a layout type for given struct or class with HLSL constant buffer
-// layout taking into account PackOffsets, if provided.
-// Previously created layout types are cached by CGHLSLRuntime.
-//
-// The function iterates over all fields of the record type (including base
-// classes) and calls layoutField to converts each field to its corresponding
-// LLVM type and to calculate its HLSL constant buffer layout. Any embedded
-// structs (or arrays of structs) are converted to target layout types as well.
-//
-// When PackOffsets are specified the elements will be placed based on the
-// user-specified offsets. Not all elements must have a packoffset/register(c#)
-// annotation though. For those that don't, the PackOffsets array will contain
-// -1 value instead. These elements must be placed at the end of the layout
-// after all of the elements with specific offset.
-llvm::TargetExtType *HLSLBufferLayoutBuilder::createLayoutType(
+llvm::StructType *HLSLBufferLayoutBuilder::layOutStruct(
const RecordType *RT, const llvm::SmallVector<int32_t> *PackOffsets) {
// check if we already have the layout type for this struct
- if (llvm::TargetExtType *Ty =
- CGM.getHLSLRuntime().getHLSLBufferLayoutType(RT))
+ // TODO: Do we need to check for matching PackOffsets?
+ if (llvm::StructType *Ty = CGM.getHLSLRuntime().getHLSLBufferLayoutType(RT))
return Ty;
- SmallVector<unsigned> Layout;
- SmallVector<llvm::Type *> LayoutElements;
- unsigned Index = 0; // packoffset index
- unsigned EndOffset = 0;
-
- SmallVector<std::pair<const FieldDecl *, unsigned>> DelayLayoutFields;
-
- // reserve first spot in the layout vector for buffer size
- Layout.push_back(0);
-
// iterate over all fields of the record, including fields on base classes
llvm::SmallVector<CXXRecordDecl *> RecordDecls;
RecordDecls.push_back(RT->castAsCXXRecordDecl());
@@ -94,179 +45,102 @@ llvm::TargetExtType *HLSLBufferLayoutBuilder::createLayoutType(
RecordDecls.push_back(D->bases_begin()->getType()->castAsCXXRecordDecl());
}
- unsigned FieldOffset;
- llvm::Type *FieldType;
+ SmallVector<llvm::Type *> Layout;
+ SmallVector<const FieldDecl *> DelayLayoutFields;
+ CharUnits CurrentOffset = CharUnits::Zero();
+ auto LayOutField = [&](QualType FieldType) {
+ llvm::Type *LayoutType = layOutType(FieldType);
+
+ const llvm::DataLayout &DL = CGM.getDataLayout();
+ CharUnits Size =
+ CharUnits::fromQuantity(DL.getTypeSizeInBits(LayoutType) / 8);
+ CharUnits Align = CharUnits::fromQuantity(DL.getABITypeAlign(LayoutType));
+
+ if (LayoutType->isAggregateType() ||
+ (CurrentOffset % CBufferRowSize) + Size > CBufferRowSize)
+ Align = Align.alignTo(CBufferRowSize);
+
+ CharUnits NextOffset = CurrentOffset.alignTo(Align);
+ if (NextOffset > CurrentOffset) {
+ llvm::Type *Padding = CGM.getTargetCodeGenInfo().getHLSLPadding(
+ CGM, NextOffset - CurrentOffset);
+ Layout.emplace_back(Padding);
+ CurrentOffset = NextOffset;
+ }
+ Layout.emplace_back(LayoutType);
+ CurrentOffset += Size;
+ };
+ unsigned PackOffsetIndex = 0;
while (!RecordDecls.empty()) {
const CXXRecordDecl *RD = RecordDecls.pop_back_val();
for (const auto *FD : RD->fields()) {
- assert((!PackOffsets || Index < PackOffsets->size()) &&
+ assert((!PackOffsets || PackOffsetIndex < PackOffsets->size()) &&
"number of elements in layout struct does not match number of "
"packoffset annotations");
// No PackOffset info at all, or have a valid packoffset/register(c#)
// annotations value -> layout the field.
- const int PO = PackOffsets ? (*PackOffsets)[Index++] : -1;
- if (!PackOffsets || PO != -1) {
- if (!layoutField(FD, EndOffset, FieldOffset, FieldType, PO))
- return nullptr;
- Layout.push_back(FieldOffset);
- LayoutElements.push_back(FieldType);
+ const int PO = PackOffsets ? (*PackOffsets)[PackOffsetIndex++] : -1;
+ if (PO != -1) {
+ LayOutField(FD->getType());
continue;
}
// Have PackOffset info, but there is no packoffset/register(cX)
// annotation on this field. Delay the layout until after all of the
// other elements with packoffsets/register(cX) are processed.
- DelayLayoutFields.emplace_back(FD, LayoutElements.size());
- // reserve space for this field in the layout vector and elements list
- Layout.push_back(UINT_MAX);
- LayoutElements.push_back(nullptr);
+ DelayLayoutFields.emplace_back(FD);
}
}
// process delayed layouts
- for (auto I : DelayLayoutFields) {
- const FieldDecl *FD = I.first;
- const unsigned IndexInLayoutElements = I.second;
- // the first item in layout vector is size, so we need to offset the index
- // by 1
- const unsigned IndexInLayout = IndexInLayoutElements + 1;
- assert(Layout[IndexInLayout] == UINT_MAX &&
- LayoutElements[IndexInLayoutElements] == nullptr);
-
- if (!layoutField(FD, EndOffset, FieldOffset, FieldType))
- return nullptr;
- Layout[IndexInLayout] = FieldOffset;
- LayoutElements[IndexInLayoutElements] = FieldType;
- }
+ for (const FieldDecl *FD : DelayLayoutFields)
+ LayOutField(FD->getType());
- // set the size of the buffer
- Layout[0] = EndOffset;
-
- // create the layout struct type; anonymous struct have empty name but
+ // Create the layout struct type; anonymous structs have empty name but
// non-empty qualified name
const auto *Decl = RT->castAsCXXRecordDecl();
std::string Name =
Decl->getName().empty() ? "anon" : Decl->getQualifiedNameAsString();
- llvm::StructType *StructTy =
- llvm::StructType::create(LayoutElements, Name, true);
- // create target layout type
- llvm::TargetExtType *NewLayoutTy = llvm::TargetExtType::get(
- CGM.getLLVMContext(), LayoutTypeName, {StructTy}, Layout);
- if (NewLayoutTy)
- CGM.getHLSLRuntime().addHLSLBufferLayoutType(RT, NewLayoutTy);
- return NewLayoutTy;
+ llvm::StructType *NewTy = llvm::StructType::create(Layout, Name,
+ /*isPacked=*/true);
+ CGM.getHLSLRuntime().addHLSLBufferLayoutType(RT, NewTy);
+ return NewTy;
}
-// The function converts a single field of HLSL Buffer to its corresponding
-// LLVM type and calculates it's layout. Any embedded structs (or
-// arrays of structs) are converted to target layout types as well.
-// The converted type is set to the FieldType parameter, the element
-// offset is set to the FieldOffset parameter. The EndOffset (=size of the
-// buffer) is also updated accordingly to the offset just after the placed
-// element, unless the incoming EndOffset already larger (may happen in case
-// of unsorted packoffset annotations).
-// Returns true if the conversion was successful.
-// The packoffset parameter contains the field's layout offset provided by the
-// user or -1 if there was no packoffset (or register(cX)) annotation.
-bool HLSLBufferLayoutBuilder::layoutField(const FieldDecl *FD,
- unsigned &EndOffset,
- unsigned &FieldOffset,
- llvm::Type *&FieldType,
- int Packoffset) {
-
- // Size of element; for arrays this is a size of a single element in the
- // array. Total array size of calculated as (ArrayCount-1) * ArrayStride +
- // ElemSize.
- unsigned ElemSize = 0;
- unsigned ElemOffset = 0;
- unsigned ArrayCount = 1;
- unsigned ArrayStride = 0;
-
- unsigned NextRowOffset = llvm::alignTo(EndOffset, CBufferRowSizeInBytes);
-
- llvm::Type *ElemLayoutTy = nullptr;
- QualType FieldTy = FD->getType();
-
- if (FieldTy->isConstantArrayType()) {
- // Unwrap array to find the element type and get combined array size.
- QualType Ty = FieldTy;
- while (Ty->isConstantArrayType()) {
- auto *ArrayTy = CGM.getContext().getAsConstantArrayType(Ty);
- ArrayCount *= ArrayTy->getSExtSize();
- Ty = ArrayTy->getElementType();
- }
- // For array of structures, create a new array with a layout type
- // instead of the structure type.
- if (Ty->isStructureOrClassType()) {
- llvm::Type *NewTy = cast<llvm::TargetExtType>(
- createLayoutType(Ty->getAsCanonical<RecordType>()));
- if (!NewTy)
- return false;
- assert(isa<llvm::TargetExtType>(NewTy) && "expected target type");
- ElemSize = cast<llvm::TargetExtType>(NewTy)->getIntParameter(0);
- ElemLayoutTy = createArrayWithNewElementType(
- CGM, cast<ConstantArrayType>(FieldTy.getTypePtr()), NewTy);
- } else {
- // Array of vectors or scalars
- ElemSize =
- getScalarOrVectorSizeInBytes(CGM.getTypes().ConvertTypeForMem(Ty));
- ElemLayoutTy = CGM.getTypes().ConvertTypeForMem(FieldTy);
- }
- ArrayStride = llvm::alignTo(ElemSize, CBufferRowSizeInBytes);
- ElemOffset = (Packoffset != -1) ? Packoffset : NextRowOffset;
-
- } else if (FieldTy->isStructureOrClassType()) {
- // Create a layout type for the structure
- ElemLayoutTy = createLayoutType(
- cast<RecordType>(FieldTy->getAsCanonical<RecordType>()));
- if (!ElemLayoutTy)
- return false;
- assert(isa<llvm::TargetExtType>(ElemLayoutTy) && "expected target type");
- ElemSize = cast<llvm::TargetExtType>(ElemLayoutTy)->getIntParameter(0);
- ElemOffset = (Packoffset != -1) ? Packoffset : NextRowOffset;
-
- } else {
- // scalar or vector - find element size and alignment
- unsigned Align = 0;
- ElemLayoutTy = CGM.getTypes().ConvertTypeForMem(FieldTy);
- if (ElemLayoutTy->isVectorTy()) {
- // align vectors by sub element size
- const llvm::FixedVectorType *FVT =
- cast<llvm::FixedVectorType>(ElemLayoutTy);
- unsigned SubElemSize = FVT->getElementType()->getScalarSizeInBits() / 8;
- ElemSize = FVT->getNumElements() * SubElemSize;
- Align = SubElemSize;
- } else {
- assert(ElemLayoutTy->isIntegerTy() || ElemLayoutTy->isFloatingPointTy());
- ElemSize = ElemLayoutTy->getScalarSizeInBits() / 8;
- Align = ElemSize;
- }
+llvm::Type *HLSLBufferLayoutBuilder::layOutArray(const ConstantArrayType *AT) {
+ llvm::Type *EltTy = layOutType(AT->getElementType());
+ uint64_t Count = AT->getZExtSize();
+
+ CharUnits EltSize =
+ CharUnits::fromQuantity(CGM.getDataLayout().getTypeSizeInBits(EltTy) / 8);
+ CharUnits Padding = EltSize.alignTo(CBufferRowSize) - EltSize;
+
+ // If we don't have any padding between elements then we just need the array
+ // itself.
+ if (Count < 2 || Padding.isZero())
+ return llvm::ArrayType::get(EltTy, Count);
+
+ llvm::LLVMContext &Context = CGM.getLLVMContext();
+ llvm::Type *PaddingTy =
+ CGM.getTargetCodeGenInfo().getHLSLPadding(CGM, Padding);
+ auto *PaddedEltTy =
+ llvm::StructType::get(Context, {EltTy, PaddingTy}, /*isPacked=*/true);
+ return llvm::StructType::get(
+ Context, {llvm::ArrayType::get(PaddedEltTy, Count - 1), EltTy},
+ /*IsPacked=*/true);
+}
- // calculate or get element offset for the vector or scalar
- if (Packoffset != -1) {
- ElemOffset = Packoffset;
- } else {
- ElemOffset = llvm::alignTo(EndOffset, Align);
- // if the element does not fit, move it to the next row
- if (ElemOffset + ElemSize > NextRowOffset)
- ElemOffset = NextRowOffset;
- }
- }
+llvm::Type *HLSLBufferLayoutBuilder::layOutType(QualType Ty) {
+ if (const auto *AT = CGM.getContext().getAsConstantArrayType(Ty))
+ return layOutArray(AT);
- // Update end offset of the layout; do not update it if the EndOffset
- // is already bigger than the new value (which may happen with unordered
- // packoffset annotations)
- unsigned NewEndOffset =
- ElemOffset + (ArrayCount - 1) * ArrayStride + ElemSize;
- EndOffset = std::max<unsigned>(EndOffset, NewEndOffset);
+ if (Ty->isStructureOrClassType())
+ return layOutStruct(Ty->getAsCanonical<RecordType>());
- // add the layout element and offset to the lists
- FieldOffset = ElemOffset;
- FieldType = ElemLayoutTy;
- return true;
+ return CGM.getTypes().ConvertTypeForMem(Ty);
}
} // namespace CodeGen
diff --git a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h
index 61240b280cfcb..0515b469f8b03 100644
--- a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h
+++ b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h
@@ -6,13 +6,11 @@
//
//===----------------------------------------------------------------------===//
+#include "clang/AST/TypeBase.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/DerivedTypes.h"
namespace clang {
-class RecordType;
-class FieldDecl;
-
namespace CodeGen {
class CodeGenModule;
@@ -24,23 +22,36 @@ class CodeGenModule;
class HLSLBufferLayoutBuilder {
private:
CodeGenModule &CGM;
- llvm::StringRef LayoutTypeName;
public:
- HLSLBufferLayoutBuilder(CodeGenModule &CGM, llvm::StringRef LayoutTypeName)
- : CGM(CGM), LayoutTypeName(LayoutTypeName) {}
-
- // Returns LLVM target extension type with the name LayoutTypeName
- // for given structure type and layout data. The first number in
- // the Layout is the size followed by offsets for each struct element.
- llvm::TargetExtType *
- createLayoutType(const RecordType *StructType,
- const llvm::SmallVector<int32_t> *Packoffsets = nullptr);
-
-private:
- bool layoutField(const clang::FieldDecl *FD, unsigned &EndOffset,
- unsigned &FieldOffset, llvm::Type *&FieldType,
- int Packoffset = -1);
+ HLSLBufferLayoutBuilder(CodeGenModule &CGM) : CGM(CGM) {}
+
+ /// Lays out a struct type following HLSL buffer rules and considering
+ /// PackOffsets, if provided. Previously created layout structs are cached by
+ /// CGHLSLRuntime.
+ ///
+ /// The function iterates over all fields of the record type (including base
+ /// classes) and calls layoutField to converts each field to its corresponding
+ /// LLVM type and to calculate its HLSL constant buffer layout. Any embedded
+ /// structs (or arrays of structs) are converted to layout types as well.
+ ///
+ /// When PackOffsets are specified the elements will be placed based on the
+ /// user-specified offsets. Not all elements must have a
+ /// packoffset/register(c#) annotation though. For those that don't, the
+ /// PackOffsets array will contain -1 value instead. These elements must be
+ /// placed at the end of the layout after all of the elements with specific
+ /// offset.
+ llvm::StructType *
+ layOutStruct(const RecordType *StructType,
+ const llvm::SmallVector<int32_t> *Packoffsets = nullptr);
+
+ /// Lays out an array type following HLSL buffer rules.
+ llvm::Type *
+ layOutArray(const ConstantArrayType *AT);
+
+ /// Lays out a type following HLSL buffer rules. Arrays and structures will be
+ /// padded appropriately and nested objects will be converted as appropriate.
+ llvm::Type *layOutType(QualType Type);
};
} // namespace CodeGen
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index d0edae1295094..8b59fde4b4120 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -448,6 +448,20 @@ class TargetCodeGenInfo {
return nullptr;
}
+ virtual llvm::Type *
+ getHLSLPadding(CodeGenModule &CGM, CharUnits NumBytes) const {
+ return llvm::ArrayType::get(llvm::Type::getInt8Ty(CGM.getLLVMContext()),
+ NumBytes.getQuantity());
+ }
+
+ virtual bool isHLSLPadding(llvm::Type *Ty) const {
+ // TODO: Do we actually want to default these functions like this?
+ if (auto *AT = dyn_cast<llvm::ArrayType>(Ty))
+ if (AT->getElementType() == llvm::Type::getInt8Ty(Ty->getContext()))
+ return true;
+ return false;
+ }
+
// Set the Branch Protection Attributes of the Function accordingly to the
// BPI. Remove attributes that contradict with current BPI.
static void
diff --git a/clang/lib/CodeGen/Targets/DirectX.cpp b/clang/lib/CodeGen/Targets/DirectX.cpp
index b4cebb9a32aca..6f6eb8beed5b9 100644
--- a/clang/lib/CodeGen/Targets/DirectX.cpp
+++ b/clang/lib/CodeGen/Targets/DirectX.cpp
@@ -32,6 +32,19 @@ class DirectXTargetCodeGenInfo : public TargetCodeGenInfo {
llvm::Type *
getHLSLType(CodeGenModule &CGM, const Type *T,
const SmallVector<int32_t> *Packoffsets = nullptr) const override;
+
+ llvm::Type *getHLSLPadding(CodeGenModule &CGM,
+ CharUnits NumBytes) const override {
+ unsigned Size = NumBytes.getQuantity();
+ return llvm::TargetExtType::get(CGM.getLLVMContext(), "dx.Padding", {},
+ {Size});
+ }
+
+ bool isHLSLPadding(llvm::Type *Ty) const override {
+ if (auto *TET = dyn_cast<llvm::TargetExtType>(Ty))
+ return TET->getName() == "dx.Padding";
+ return false;
+ }
};
llvm::Type *DirectXTargetCodeGenInfo::getHLSLType(
@@ -75,10 +88,9 @@ llvm::Type *DirectXTargetCodeGenInfo::getHLSLType(
if (ContainedTy.isNull() || !ContainedTy->isStructureType())
return nullptr;
- llvm::Type *BufferLayoutTy =
- HLSLBufferLayoutBuilder(CGM, "dx.Layout")
- .createLayoutType(ContainedTy->castAsCanonical<RecordType>(),
- Packoffsets);
+ llvm::StructType *BufferLayoutTy =
+ HLSLBufferLayoutBuilder(CGM).layOutStruct(
+ ContainedTy->getAsCanonical<RecordType>(), Packoffsets);
if (!BufferLayoutTy)
return nullptr;
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 80e096ecf5ae9..a523cef801487 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -56,6 +56,20 @@ class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo {
llvm::Type *
getHLSLType(CodeGenModule &CGM, const Type *Ty,
const SmallVector<int32_t> *Packoffsets = nullptr) const override;
+
+ llvm::Type *
+ getHLSLPadding(CodeGenModule &CGM, CharUnits NumBytes) const override {
+ unsigned Size = NumBytes.getQuantity();
+ return llvm::TargetExtType::get(CGM.getLLVMContext(), "spirv.Padding", {},
+ {Size});
+ }
+
+ bool isHLSLPadding(llvm::Type *Ty) const override {
+ if (auto *TET = dyn_cast<llvm::TargetExtType>(Ty))
+ return TET->getName() == "spirv.Padding";
+ return false;
+ }
+
llvm::Type *getSPIRVImageTypeFromHLSLResource(
const HLSLAttributedResourceType::Attributes &attributes,
QualType SampledType, CodeGenModule &CGM) const;
@@ -533,10 +547,9 @@ llvm::Type *CommonSPIRTargetCodeGenInfo::getHLSLType(
if (ContainedTy.isNull() || !ContainedTy->isStructureType())
return nullptr;
- llvm::Type *BufferLayoutTy =
- HLSLBufferLayoutBuilder(CGM, "spirv.Layout")
- .createLayoutType(ContainedTy->castAsCanonical<RecordType>(),
- Packoffsets);
+ llvm::StructType *BufferLayoutTy =
+ HLSLBufferLayoutBuilder(CGM).layOutStruct(
+ ContainedTy->getAsCanonical<RecordType>(), Packoffsets);
uint32_t StorageClass = /* Uniform storage class */ 2;
return llvm::TargetExtType::get(Ctx, "spirv.VulkanBuffer", {BufferLayoutTy},
{StorageClass, false});
diff --git a/clang/test/CodeGenHLSL/ArrayAssignable.hlsl b/clang/test/CodeGenHLSL/ArrayAssignable.hlsl
index aaa486eff10b7..0a71801a860d2 100644
--- a/clang/test/CodeGenHLSL/ArrayAssignable.hlsl
+++ b/clang/test/CodeGenHLSL/ArrayAssignable.hlsl
@@ -5,18 +5,19 @@ struct S {
float f;
};
-// CHECK: [[CBLayout:%.*]] = type <{ [2 x float], [2 x <4 x i32>], [2 x [2 x i32]], [1 x target("dx.Layout", %S, 8, 0, 4)] }>
-// CHECK: @CBArrays.cb = global target("dx.CBuffer", target("dx.Layout", [[CBLayout]], 136, 0, 32, 64, 128))
-// CHECK: @c1 = external hidden addrspace(2) global [2 x float], align 4
+// CHECK: [[CBLayout:%.*]] = type <{ <{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, target("dx.Padding", 12), [2 x <4 x i32>], <{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, target("dx.Padding", 12), <{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }> }>
+
+// CHECK: @CBArrays.cb = global target("dx.CBuffer", [[CBLayout]])
+// CHECK: @c1 = external hidden addrspace(2) global <{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, align 4
// CHECK: @c2 = external hidden addrspace(2) global [2 x <4 x i32>], align 16
-// CHECK: @c3 = external hidden addrspace(2) global [2 x [2 x i32]], align 4
-// CHECK: @c4 = external hidden addrspace(2) global [1 x target("dx.Layout", %S, 8, 0, 4)], align 1
+// CHECK: @c3 = external hidden addrspace(2) global <{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, align 4
+// CHECK: @c4 = external hidden addrspace(2) global <{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }>, align 1
cbuffer CBArrays : register(b0) {
float c1[2];
int4 c2[2];
int c3[2][2];
- S c4[1];
+ S c4[2];
}
// CHECK-LABEL: define hidden void {{.*}}arr_assign1
@@ -140,40 +141,71 @@ void arr_assign7() {
// CHECK-LABEL: define hidden void {{.*}}arr_assign8
// CHECK: [[C:%.*]] = alloca [2 x float], align 4
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[C]], ptr align 4 {{.*}}, i32 8, i1 false)
-// CHECK-NEXT: call void @llvm.memcpy.p0.p2.i32(ptr align 4 [[C]], ptr addrspace(2) align 4 @c1, i32 8, i1 false)
+// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x float], ptr [[C]], i32 0
+// CHECK-NEXT: [[L0:%.*]] = load float, ptr addrspace(2) @c1, align 4
+// CHECK-NEXT: store float [[L0]], ptr [[V0]], align 4
+// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x float], ptr [[C]], i32 0, i32 1
+// CHECK-NEXT: [[L1:%.*]] = load float, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, ptr addrspace(2) @c1, i32 0, i32 1), align 4
+// CHECK-NEXT: store float [[L1]], ptr [[V1]], align 4
// CHECK-NEXT: ret void
void arr_assign8() {
- float C[2] = {1.0, 2.0};
+ float C[2];
C = c1;
}
+// TODO: A memcpy would actually be valid here, since everything is aligned on
+// 16 byte boundaries.
+//
// CHECK-LABEL: define hidden void {{.*}}arr_assign9
// CHECK: [[C:%.*]] = alloca [2 x <4 x i32>], align 16
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[C]], ptr align 16 {{.*}}, i32 32, i1 false)
-// CHECK-NEXT: call void @llvm.memcpy.p0.p2.i32(ptr align 16 [[C]], ptr addrspace(2) align 16 @c2, i32 32, i1 false)
+// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[C]], i32 0
+// CHECK-NEXT: [[L0:%.*]] = load <4 x i32>, ptr addrspace(2) @c2, align 16
+// CHECK-NEXT: store <4 x i32> [[L0]], ptr [[V0]], align 16
+// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[C]], i32 0, i32 1
+// CHECK-NEXT: [[L1:%.*]] = load <4 x i32>, ptr addrspace(2) getelementptr inbounds ([2 x <4 x i32>], ptr addrspace(2) @c2, i32 0, i32 1), align 16
+// CHECK-NEXT: store <4 x i32> [[L1]], ptr [[V1]], align 16
// CHECK-NEXT: ret void
void arr_assign9() {
- int4 C[2] = {1,2,3,4,5,6,7,8};
+ int4 C[2];
C = c2;
}
// CHECK-LABEL: define hidden void {{.*}}arr_assign10
// CHECK: [[C:%.*]] = alloca [2 x [2 x i32]], align 4
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[C]], ptr align 4 {{.*}}, i32 16, i1 false)
-// CHECK-NEXT: call void @llvm.memcpy.p0.p2.i32(ptr align 4 [[C]], ptr addrspace(2) align 4 @c3, i32 16, i1 false)
+// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 0
+// CHECK-NEXT: [[L0:%.*]] = load i32, ptr addrspace(2) @c3, align 4
+// CHECK-NEXT: store i32 [[L0]], ptr [[V0]], align 4
+// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 0, i32 1
+// CHECK-NEXT: [[L1:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, ptr addrspace(2) @c3, i32 0, i32 0, i32 0, i32 1), align 4
+// CHECK-NEXT: store i32 [[L1]], ptr [[V1]], align 4
+// CHECK-NEXT: [[V2:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 1, i32 0
+// CHECK-NEXT: [[L2:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, ptr addrspace(2) @c3, i32 0, i32 1, i32 0, i32 0), align 4
+// CHECK-NEXT: store i32 [[L2]], ptr [[V2]], align 4
+// CHECK-NEXT: [[V3:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 1, i32 1
+// CHECK-NEXT: [[L3:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, ptr addrspace(2) @c3, i32 0, i32 1, i32 1), align 4
+// CHECK-NEXT: store i32 [[L3]], ptr [[V3]], align 4
// CHECK-NEXT: ret void
void arr_assign10() {
- int C[2][2] = {1,2,3,4};
+ int C[2][2];
C = c3;
}
// CHECK-LABEL: define hidden void {{.*}}arr_assign11
-// CHECK: [[C:%.*]] = alloca [1 x %struct.S], align 1
-// CHECK: call void @llvm.memcpy.p0.p2.i32(ptr align 1 [[C]], ptr addrspace(2) align 1 @c4, i32 8, i1 false)
+// CHECK: [[C:%.*]] = alloca [2 x %struct.S], align 1
+// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 0
+// CHECK-NEXT: [[L0:%.*]] = load i32, ptr addrspace(2) @c4, align 4
+// CHECK-NEXT: store i32 [[L0]], ptr [[V0]], align 4
+// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 0, i32 1
+// CHECK-NEXT: [[L1:%.*]] = load float, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }>, ptr addrspace(2) @c4, i32 0, i32 0, i32 0, i32 1), align 4
+// CHECK-NEXT: store float [[L1]], ptr [[V1]], align 4
+// CHECK-NEXT: [[V2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 1, i32 0
+// CHECK-NEXT: [[L2:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }>, ptr addrspace(2) @c4, i32 0, i32 1, i32 0), align 4
+// CHECK-NEXT: store i32 [[L2]], ptr [[V2]], align 4
+// CHECK-NEXT: [[V3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 1, i32 1
+// CHECK-NEXT: [[L3:%.*]] = load float, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }>, ptr addrspace(2) @c4, i32 0, i32 1, i32 1), align 4
+// CHECK-NEXT: store float [[L3]], ptr [[V3]], align 4
// CHECK-NEXT: ret void
void arr_assign11() {
- S s = {1, 2.0};
- S C[1] = {s};
+ S C[2];
C = c4;
}
diff --git a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
index b36682e065b3a..df82e8201ba55 100644
--- a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
@@ -37,9 +37,9 @@ void main(unsigned GI : SV_GroupIndex) {}
// INLINE-NEXT: alloca
// INLINE-NEXT: store i32 12
// INLINE-NEXT: store i32 13
-// INLINE-NEXT: %[[HANDLE:.*]] = call target("dx.CBuffer", target("dx.Layout", %"__cblayout_$Globals", 4, 0))
-// INLINE-NEXT-SAME: @"llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_$Globalss_4_0tt"(i32 0, i32 0, i32 1, i32 0, i1 false)
-// INLINE-NEXT: store target("dx.CBuffer", target("dx.Layout", %"__cblayout_$Globals", 4, 0)) %[[HANDLE]], ptr @"$Globals.cb", align 4
+// INLINE-NEXT: %[[HANDLE:.*]] = call target("dx.CBuffer", %"__cblayout_$Globals")
+// INLINE-NEXT-SAME: @"llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_$Globalsst"(i32 0, i32 0, i32 1, i32 0, i1 false, ptr @"$Globals.str")
+// INLINE-NEXT: store target("dx.CBuffer", %"__cblayout_$Globals") %[[HANDLE]], ptr @"$Globals.cb", align 4
// INLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
// INLINE-NEXT: store i32 %
// INLINE-NEXT: store i32 0
diff --git a/clang/test/CodeGenHLSL/resources/cbuffer.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer.hlsl
index 8dcff5dad9d13..ab37ae0bc4e68 100644
--- a/clang/test/CodeGenHLSL/resources/cbuffer.hlsl
+++ b/clang/test/CodeGenHLSL/resources/cbuffer.hlsl
@@ -1,37 +1,121 @@
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-compute -fnative-half-type -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
-// CHECK: %__cblayout_CBScalars = type <{ float, double, half, i64, i32, i16, i32, i64 }>
-// CHECK: %__cblayout_CBVectors = type <{ <3 x float>, <3 x double>, <2 x half>, <3 x i64>, <4 x i32>, <3 x i16>, <3 x i64> }>
-// CHECK: %__cblayout_CBArrays = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [2 x [3 x [4 x <4 x i32>]]], [1 x i16], [2 x i64], [4 x i32] }>
-// CHECK: %__cblayout_CBStructs = type <{ target("dx.Layout", %A, 8, 0), target("dx.Layout", %B, 14, 0, 8),
-// CHECK-SAME: target("dx.Layout", %C, 24, 0, 16), [5 x target("dx.Layout", %A, 8, 0)],
-// CHECK-SAME: target("dx.Layout", %__cblayout_D, 94, 0), half, <3 x i16> }>
+// CHECK: %__cblayout_CBScalars = type <{
+// CHECK-SAME: float, target("dx.Padding", 4), double,
+// CHECK-SAME: half, target("dx.Padding", 6), i64,
+// CHECK-SAME: i32, i16, target("dx.Padding", 2), i32, target("dx.Padding", 4),
+// CHECK-SAME: i64
+// CHECK-SAME: }>
+
+// CHECK: %__cblayout_CBVectors = type <{
+// CHECK-SAME: <3 x float>, target("dx.Padding", 4),
+// CHECK-SAME: <3 x double>, <2 x half>, target("dx.Padding", 4),
+// CHECK-SAME: <3 x i64>, target("dx.Padding", 8),
+// CHECK-SAME: <4 x i32>,
+// CHECK-SAME: <3 x i16>, target("dx.Padding", 10),
+// CHECK-SAME: <3 x i64>
+// CHECK-SAME: }>
+
+// CHECK: %__cblayout_CBArrays = type <{
+// CHECK-SAME: <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, target("dx.Padding", 12),
+// CHECK-SAME: <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, target("dx.Padding", 8),
+// CHECK-SAME: <{ [1 x <{
+// CHECK-SAME: <{ [1 x <{ half, target("dx.Padding", 14) }>], half }>, target("dx.Padding", 14) }>],
+// CHECK-SAME: <{ [1 x <{ half, target("dx.Padding", 14) }>], half }>
+// CHECK-SAME: }>, target("dx.Padding", 14),
+// CHECK-SAME: <{ [2 x <{ i64, target("dx.Padding", 8) }>], i64 }>, target("dx.Padding", 8),
+// CHECK-SAME: [2 x [3 x [4 x <4 x i32>]]]
+// CHECK-SAME: [1 x i16], target("dx.Padding", 14),
+// CHECK-SAME: <{ [1 x <{ i64, target("dx.Padding", 8) }>], i64 }>, target("dx.Padding", 8),
+// CHECK-SAME: <{ [3 x <{ i32, target("dx.Padding", 12) }>], i32 }>
+// CHECK-SAME: }>
+
+// CHECK: %__cblayout_CBStructs = type <{
+// CHECK-SAME: %A, target("dx.Padding", 8),
+
+// TODO: We should have target("dx.Padding", 2) padding after %B, but we don't correctly handle
+// 2- and 3-element vectors inside structs yet because of DataLayout rules.
+// CHECK-SAME: %B,
+
+// CHECK-SAME: %C, target("dx.Padding", 8),
+// CHECK-SAME: <{ [4 x <{ %A, target("dx.Padding", 8) }>], %A }>, target("dx.Padding", 8),
+// CHECK-SAME: %__cblayout_D, half,
+// CHECK-SAME: <3 x i16>
+// CHECK-SAME: }>
// CHECK: %A = type <{ <2 x float> }>
// CHECK: %B = type <{ <2 x float>, <3 x i16> }>
-// CHECK: %C = type <{ i32, target("dx.Layout", %A, 8, 0) }>
-// CHECK: %__cblayout_D = type <{ [2 x [3 x target("dx.Layout", %B, 14, 0, 8)]] }>
+// CHECK: %C = type <{ i32, target("dx.Padding", 12), %A }>
+
+// CHECK: %__cblayout_D = type <{
+// CHECK-SAME: <{ [1 x <{
+// CHECK-SAME: <{ [2 x <{ %B, target("dx.Padding", 2) }>], %B }>, target("dx.Padding", 2)
+// CHECK-SAME: }>],
+// CHECK-SAME: <{ [2 x <{ %B, target("dx.Padding", 2) }>], %B }> }>
+// CHECK-SAME: }>
+
+// CHECK: %__cblayout_CBClasses = type <{
+// CHECK-SAME: %K, target("dx.Padding", 12),
+// CHECK-SAME: %L, target("dx.Padding", 8),
+// CHECK-SAME: %M, target("dx.Padding", 12),
+// CHECK-SAME: <{ [9 x <{ %K, target("dx.Padding", 12) }>], %K }>
+// CHECK-SAME: }>
-// CHECK: %__cblayout_CBClasses = type <{ target("dx.Layout", %K, 4, 0), target("dx.Layout", %L, 8, 0, 4),
-// CHECK-SAME: target("dx.Layout", %M, 68, 0), [10 x target("dx.Layout", %K, 4, 0)] }>
// CHECK: %K = type <{ float }>
// CHECK: %L = type <{ float, float }>
-// CHECK: %M = type <{ [5 x target("dx.Layout", %K, 4, 0)] }>
-
-// CHECK: %__cblayout_CBMix = type <{ [2 x target("dx.Layout", %Test, 8, 0, 4)], float, [3 x [2 x <2 x float>]], float,
-// CHECK-SAME: target("dx.Layout", %anon, 4, 0), double, target("dx.Layout", %anon.0, 8, 0), float, <1 x double>, i16 }>
+// CHECK: %M = type <{ <{ [4 x <{ %K, target("dx.Padding", 12) }>], %K }> }>
+
+// CHECK: %__cblayout_CBMix = type <{
+// CHECK-SAME: <{ [1 x <{ %Test, target("dx.Padding", 8) }>], %Test }>, float, target("dx.Padding", 4)
+// CHECK-SAME: <{ [2 x <{
+// CHECK-SAME: <{ [1 x <{ <2 x float>, target("dx.Padding", 8) }>], <2 x float> }>, target("dx.Padding", 8) }>],
+// CHECK-SAME: <{ [1 x <{ <2 x float>, target("dx.Padding", 8) }>], <2 x float> }>
+// CHECK-SAME: }>, float, target("dx.Padding", 4),
+// CHECK-SAME: %anon, target("dx.Padding", 4), double,
+// CHECK-SAME: %anon.0, float, target("dx.Padding", 4),
+// CHECK-SAME: <1 x double>, i16
+// CHECK-SAME: }>
// CHECK: %Test = type <{ float, float }>
// CHECK: %anon = type <{ float }>
// CHECK: %anon.0 = type <{ <2 x i32> }>
-// CHECK: %__cblayout_CB_A = type <{ [2 x double], [3 x <3 x float>], float, [3 x double], half, [1 x <2 x double>], float, [2 x <3 x half>], <3 x half> }>
-// CHECK: %__cblayout_CB_B = type <{ [3 x <3 x double>], <3 x half> }>
-// CHECK: %__cblayout_CB_C = type <{ i32, target("dx.Layout", %F, 96, 0, 16, 28, 32, 56, 64, 80, 84, 90), half, target("dx.Layout", %G, 258, 0, 48, 64, 256), double }>
-
-// CHECK: %F = type <{ double, <3 x float>, float, <3 x double>, half, <2 x double>, float, <3 x half>, <3 x half> }>
-// CHECK: %G = type <{ target("dx.Layout", %E, 36, 0, 8, 16, 20, 22, 24, 32), [1 x float], [2 x target("dx.Layout", %F, 96, 0, 16, 28, 32, 56, 64, 80, 84, 90)], half }>
-// CHECK: %E = type <{ float, double, float, half, i16, i64, i32 }>
+// CHECK: %__cblayout_CB_A = type <{
+// CHECK-SAME: <{ [1 x <{ double, target("dx.Padding", 8) }>], double }>, target("dx.Padding", 8),
+// CHECK-SAME: <{ [2 x <{ <3 x float>, target("dx.Padding", 4) }>], <3 x float> }>, float,
+// CHECK-SAME: <{ [2 x <{ double, target("dx.Padding", 8) }>], double }>, half, target("dx.Padding", 6),
+// CHECK-SAME: [1 x <2 x double>],
+// CHECK-SAME: float, target("dx.Padding", 12),
+// CHECK-SAME: <{ [1 x <{ <3 x half>, target("dx.Padding", 10) }>], <3 x half> }>, <3 x half>
+// CHECK-SAME: }>
+
+// CHECK: %__cblayout_CB_B = type <{
+// CHECK-SAME: <{ [2 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, <3 x half>
+// CHECK-SAME: }>
+
+// CHECK: %__cblayout_CB_C = type <{
+// CHECK-SAME: i32, target("dx.Padding", 12),
+// CHECK-SAME: %F,
+// CHECK-SAME: half, target("dx.Padding", 14),
+// CHECK-SAME: %G, target("dx.Padding", 6), double
+// CHECK-SAME: }>
+
+// CHECK: %F = type <{
+// CHECK-SAME: double, target("dx.Padding", 8),
+// CHECK-SAME: <3 x float>, float,
+// CHECK-SAME: <3 x double>, half, target("dx.Padding", 6),
+// CHECK-SAME: <2 x double>,
+// CHECK-SAME: float, <3 x half>, <3 x half>
+// CHECK-SAME: }>
+
+// CHECK: %G = type <{
+// CHECK-SAME: %E, target("dx.Padding", 12),
+// CHECK-SAME: [1 x float], target("dx.Padding", 12),
+// CHECK-SAME: [2 x %F],
+// CHECK-SAME: half
+// CHECK-SAME: }>
+
+// CHECK: %E = type <{ float, target("dx.Padding", 4), double, float, half, i16, i64, i32 }>
cbuffer CBScalars : register(b1, space5) {
float a1;
@@ -44,8 +128,7 @@ cbuffer CBScalars : register(b1, space5) {
int64_t a8;
}
-// CHECK: @CBScalars.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBScalars,
-// CHECK-SAME: 56, 0, 8, 16, 24, 32, 36, 40, 48))
+// CHECK: @CBScalars.cb = global target("dx.CBuffer", %__cblayout_CBScalars)
// CHECK: @a1 = external hidden addrspace(2) global float, align 4
// CHECK: @a2 = external hidden addrspace(2) global double, align 8
// CHECK: @a3 = external hidden addrspace(2) global half, align 2
@@ -67,8 +150,7 @@ cbuffer CBVectors {
// FIXME: add a bool vectors after llvm-project/llvm#91639 is added
}
-// CHECK: @CBVectors.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBVectors,
-// CHECK-SAME: 136, 0, 16, 40, 48, 80, 96, 112))
+// CHECK: @CBVectors.cb = global target("dx.CBuffer", %__cblayout_CBVectors)
// CHECK: @b1 = external hidden addrspace(2) global <3 x float>, align 16
// CHECK: @b2 = external hidden addrspace(2) global <3 x double>, align 32
// CHECK: @b3 = external hidden addrspace(2) global <2 x half>, align 4
@@ -89,16 +171,15 @@ cbuffer CBArrays : register(b2) {
bool c8[4];
}
-// CHECK: @CBArrays.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBArrays,
-// CHECK-SAME: 708, 0, 48, 112, 176, 224, 608, 624, 656))
-// CHECK: @c1 = external hidden addrspace(2) global [3 x float], align 4
-// CHECK: @c2 = external hidden addrspace(2) global [2 x <3 x double>], align 32
-// CHECK: @c3 = external hidden addrspace(2) global [2 x [2 x half]], align 2
-// CHECK: @c4 = external hidden addrspace(2) global [3 x i64], align 8
+// CHECK: @CBArrays.cb = global target("dx.CBuffer", %__cblayout_CBArrays)
+// CHECK: @c1 = external hidden addrspace(2) global <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, align 4
+// CHECK: @c2 = external hidden addrspace(2) global <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, align 32
+// CHECK: @c3 = external hidden addrspace(2) global <{ [1 x <{ <{ [1 x <{ half, target("dx.Padding", 14) }>], half }>, target("dx.Padding", 14) }>], <{ [1 x <{ half, target("dx.Padding", 14) }>], half }> }>, align 2
+// CHECK: @c4 = external hidden addrspace(2) global <{ [2 x <{ i64, target("dx.Padding", 8) }>], i64 }>, align 8
// CHECK: @c5 = external hidden addrspace(2) global [2 x [3 x [4 x <4 x i32>]]], align 16
// CHECK: @c6 = external hidden addrspace(2) global [1 x i16], align 2
-// CHECK: @c7 = external hidden addrspace(2) global [2 x i64], align 8
-// CHECK: @c8 = external hidden addrspace(2) global [4 x i32], align 4
+// CHECK: @c7 = external hidden addrspace(2) global <{ [1 x <{ i64, target("dx.Padding", 8) }>], i64 }>, align 8
+// CHECK: @c8 = external hidden addrspace(2) global <{ [3 x <{ i32, target("dx.Padding", 12) }>], i32 }>, align 4
// CHECK: @CBArrays.str = private unnamed_addr constant [9 x i8] c"CBArrays\00", align 1
typedef uint32_t4 uint32_t8[2];
@@ -110,8 +191,7 @@ cbuffer CBTypedefArray : register(space2) {
T2 t2[2];
}
-// CHECK: @CBTypedefArray.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBTypedefArray,
-// CHECK-SAME: 128, 0, 64))
+// CHECK: @CBTypedefArray.cb = global target("dx.CBuffer", %__cblayout_CBTypedefArray)
// CHECK: @t1 = external hidden addrspace(2) global [2 x [2 x <4 x i32>]], align 16
// CHECK: @t2 = external hidden addrspace(2) global [2 x [2 x <4 x i32>]], align 16
// CHECK: @CBTypedefArray.str = private unnamed_addr constant [15 x i8] c"CBTypedefArray\00", align 1
@@ -135,13 +215,12 @@ struct D {
Empty es;
};
-// CHECK: @CBStructs.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBStructs,
-// CHECK-SAME: 246, 0, 16, 32, 64, 144, 238, 240))
-// CHECK: @a = external hidden addrspace(2) global target("dx.Layout", %A, 8, 0), align 1
-// CHECK: @b = external hidden addrspace(2) global target("dx.Layout", %B, 14, 0, 8), align 1
-// CHECK: @c = external hidden addrspace(2) global target("dx.Layout", %C, 24, 0, 16), align 1
-// CHECK: @array_of_A = external hidden addrspace(2) global [5 x target("dx.Layout", %A, 8, 0)], align 1
-// CHECK: @d = external hidden addrspace(2) global target("dx.Layout", %__cblayout_D, 94, 0), align 1
+// CHECK: @CBStructs.cb = global target("dx.CBuffer", %__cblayout_CBStructs)
+// CHECK: @a = external hidden addrspace(2) global %A, align 1
+// CHECK: @b = external hidden addrspace(2) global %B, align 1
+// CHECK: @c = external hidden addrspace(2) global %C, align 1
+// CHECK: @array_of_A = external hidden addrspace(2) global <{ [4 x <{ %A, target("dx.Padding", 8) }>], %A }>, align 1
+// CHECK: @d = external hidden addrspace(2) global %__cblayout_D, align 1
// CHECK: @e = external hidden addrspace(2) global half, align 2
// CHECK: @f = external hidden addrspace(2) global <3 x i16>, align 8
// CHECK: @CBStructs.str = private unnamed_addr constant [10 x i8] c"CBStructs\00", align 1
@@ -176,27 +255,25 @@ cbuffer CBClasses {
K ka[10];
};
-// CHECK: @CBClasses.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBClasses,
-// CHECK-SAME: 260, 0, 16, 32, 112))
-// CHECK: @k = external hidden addrspace(2) global target("dx.Layout", %K, 4, 0), align 1
-// CHECK: @l = external hidden addrspace(2) global target("dx.Layout", %L, 8, 0, 4), align 1
-// CHECK: @m = external hidden addrspace(2) global target("dx.Layout", %M, 68, 0), align 1
-// CHECK: @ka = external hidden addrspace(2) global [10 x target("dx.Layout", %K, 4, 0)], align 1
+// CHECK: @CBClasses.cb = global target("dx.CBuffer", %__cblayout_CBClasses)
+// CHECK: @k = external hidden addrspace(2) global %K, align 1
+// CHECK: @l = external hidden addrspace(2) global %L, align 1
+// CHECK: @m = external hidden addrspace(2) global %M, align 1
+// CHECK: @ka = external hidden addrspace(2) global <{ [9 x <{ %K, target("dx.Padding", 12) }>], %K }>, align 1
// CHECK: @CBClasses.str = private unnamed_addr constant [10 x i8] c"CBClasses\00", align 1
struct Test {
float a, b;
};
-// CHECK: @CBMix.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBMix,
-// CHECK-SAME: 170, 0, 24, 32, 120, 128, 136, 144, 152, 160, 168))
-// CHECK: @test = external hidden addrspace(2) global [2 x target("dx.Layout", %Test, 8, 0, 4)], align 1
+// CHECK: @CBMix.cb = global target("dx.CBuffer", %__cblayout_CBMix)
+// CHECK: @test = external hidden addrspace(2) global <{ [1 x <{ %Test, target("dx.Padding", 8) }>], %Test }>, align 1
// CHECK: @f1 = external hidden addrspace(2) global float, align 4
-// CHECK: @f2 = external hidden addrspace(2) global [3 x [2 x <2 x float>]], align 8
+// CHECK: @f2 = external hidden addrspace(2) global <{ [2 x <{ <{ [1 x <{ <2 x float>, target("dx.Padding", 8) }>], <2 x float> }>, target("dx.Padding", 8) }>], <{ [1 x <{ <2 x float>, target("dx.Padding", 8) }>], <2 x float> }> }>, align 8
// CHECK: @f3 = external hidden addrspace(2) global float, align 4
-// CHECK: @f4 = external hidden addrspace(2) global target("dx.Layout", %anon, 4, 0), align 1
+// CHECK: @f4 = external hidden addrspace(2) global %anon, align 1
// CHECK: @f5 = external hidden addrspace(2) global double, align 8
-// CHECK: @f6 = external hidden addrspace(2) global target("dx.Layout", %anon.0, 8, 0), align 1
+// CHECK: @f6 = external hidden addrspace(2) global %anon.0, align 1
// CHECK: @f7 = external hidden addrspace(2) global float, align 4
// CHECK: @f8 = external hidden addrspace(2) global <1 x double>, align 8
// CHECK: @f9 = external hidden addrspace(2) global i16, align 2
@@ -215,7 +292,7 @@ cbuffer CBMix {
uint16_t f9;
};
-// CHECK: @CB_A.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_A, 188, 0, 32, 76, 80, 120, 128, 144, 160, 182))
+// CHECK: @CB_A.cb = global target("dx.CBuffer", %__cblayout_CB_A)
cbuffer CB_A {
double B0[2];
@@ -229,7 +306,7 @@ cbuffer CB_A {
half3 B8;
}
-// CHECK: @CB_B.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_B, 94, 0, 88))
+// CHECK: @CB_B.cb = global target("dx.CBuffer", %__cblayout_CB_B)
cbuffer CB_B {
double3 B9[3];
half3 B10;
@@ -264,7 +341,7 @@ struct G {
half C3;
};
-// CHECK: @CB_C.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_C, 400, 0, 16, 112, 128, 392))
+// CHECK: @CB_C.cb = global target("dx.CBuffer", %__cblayout_CB_C)
cbuffer CB_C {
int D0;
F D1;
@@ -275,63 +352,63 @@ cbuffer CB_C {
// CHECK: define internal void @_init_buffer_CBScalars.cb()
// CHECK-NEXT: entry:
-// CHECK-NEXT: %CBScalars.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBScalars, 56, 0, 8, 16, 24, 32, 36, 40, 48))
-// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBScalarss_56_0_8_16_24_32_36_40_48tt(i32 5, i32 1, i32 1, i32 0, ptr @CBScalars.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBScalars, 56, 0, 8, 16, 24, 32, 36, 40, 48)) %CBScalars.cb_h, ptr @CBScalars.cb, align 4
+// CHECK-NEXT: %CBScalars.cb_h = call target("dx.CBuffer", %__cblayout_CBScalars)
+// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_s___cblayout_CBScalarsst(i32 5, i32 1, i32 1, i32 0, ptr @CBScalars.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBScalars) %CBScalars.cb_h, ptr @CBScalars.cb, align 4
// CHECK: define internal void @_init_buffer_CBVectors.cb()
// CHECK-NEXT: entry:
-// CHECK-NEXT: %CBVectors.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBVectors, 136, 0, 16, 40, 48, 80, 96, 112))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBVectorss_136_0_16_40_48_80_96_112tt(i32 0, i32 0, i32 1, i32 0, ptr @CBVectors.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBVectors, 136, 0, 16, 40, 48, 80, 96, 112)) %CBVectors.cb_h, ptr @CBVectors.cb, align 4
+// CHECK-NEXT: %CBVectors.cb_h = call target("dx.CBuffer", %__cblayout_CBVectors)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBVectorsst(i32 0, i32 0, i32 1, i32 0, ptr @CBVectors.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBVectors) %CBVectors.cb_h, ptr @CBVectors.cb, align 4
// CHECK: define internal void @_init_buffer_CBArrays.cb()
// CHECK-NEXT: entry:
-// CHECK-NEXT: %CBArrays.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBArrays, 708, 0, 48, 112, 176, 224, 608, 624, 656))
-// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBArrayss_708_0_48_112_176_224_608_624_656tt(i32 0, i32 2, i32 1, i32 0, ptr @CBArrays.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBArrays, 708, 0, 48, 112, 176, 224, 608, 624, 656)) %CBArrays.cb_h, ptr @CBArrays.cb, align 4
+// CHECK-NEXT: %CBArrays.cb_h = call target("dx.CBuffer", %__cblayout_CBArrays)
+// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_s___cblayout_CBArraysst(i32 0, i32 2, i32 1, i32 0, ptr @CBArrays.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBArrays) %CBArrays.cb_h, ptr @CBArrays.cb, align 4
// CHECK: define internal void @_init_buffer_CBTypedefArray.cb()
// CHECK-NEXT: entry:
-// CHECK-NEXT: %CBTypedefArray.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBTypedefArray, 128, 0, 64))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBTypedefArrays_128_0_64tt(i32 1, i32 2, i32 1, i32 0, ptr @CBTypedefArray.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBTypedefArray, 128, 0, 64)) %CBTypedefArray.cb_h, ptr @CBTypedefArray.cb, align 4
+// CHECK-NEXT: %CBTypedefArray.cb_h = call target("dx.CBuffer", %__cblayout_CBTypedefArray)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBTypedefArrayst(i32 1, i32 2, i32 1, i32 0, ptr @CBTypedefArray.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBTypedefArray) %CBTypedefArray.cb_h, ptr @CBTypedefArray.cb, align 4
// CHECK: define internal void @_init_buffer_CBStructs.cb()
// CHECK-NEXT: entry:
-// CHECK-NEXT: %CBStructs.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBStructs, 246, 0, 16, 32, 64, 144, 238, 240))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBStructss_246_0_16_32_64_144_238_240tt(i32 2, i32 0, i32 1, i32 0, ptr @CBStructs.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBStructs, 246, 0, 16, 32, 64, 144, 238, 240)) %CBStructs.cb_h, ptr @CBStructs.cb, align 4
+// CHECK-NEXT: %CBStructs.cb_h = call target("dx.CBuffer", %__cblayout_CBStructs)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBStructsst(i32 2, i32 0, i32 1, i32 0, ptr @CBStructs.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBStructs) %CBStructs.cb_h, ptr @CBStructs.cb, align 4
// CHECK: define internal void @_init_buffer_CBClasses.cb()
// CHECK-NEXT: entry:
-// CHECK-NEXT: %CBClasses.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBClasses, 260, 0, 16, 32, 112))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBClassess_260_0_16_32_112tt(i32 3, i32 0, i32 1, i32 0, ptr @CBClasses.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBClasses, 260, 0, 16, 32, 112)) %CBClasses.cb_h, ptr @CBClasses.cb, align 4
+// CHECK-NEXT: %CBClasses.cb_h = call target("dx.CBuffer", %__cblayout_CBClasses)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBClassesst(i32 3, i32 0, i32 1, i32 0, ptr @CBClasses.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBClasses) %CBClasses.cb_h, ptr @CBClasses.cb, align 4
// CHECK: define internal void @_init_buffer_CBMix.cb()
// CHECK-NEXT: entry:
-// CHECK-NEXT: %CBMix.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBMix, 170, 0, 24, 32, 120, 128, 136, 144, 152, 160, 168))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBMixs_170_0_24_32_120_128_136_144_152_160_168tt(i32 4, i32 0, i32 1, i32 0, ptr @CBMix.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBMix, 170, 0, 24, 32, 120, 128, 136, 144, 152, 160, 168)) %CBMix.cb_h, ptr @CBMix.cb, align 4
+// CHECK-NEXT: %CBMix.cb_h = call target("dx.CBuffer", %__cblayout_CBMix)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBMixst(i32 4, i32 0, i32 1, i32 0, ptr @CBMix.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBMix) %CBMix.cb_h, ptr @CBMix.cb, align 4
// CHECK: define internal void @_init_buffer_CB_A.cb()
// CHECK-NEXT: entry:
-// CHECK-NEXT: %CB_A.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_A, 188, 0, 32, 76, 80, 120, 128, 144, 160, 182))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CB_As_188_0_32_76_80_120_128_144_160_182tt(i32 5, i32 0, i32 1, i32 0, ptr @CB_A.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_A, 188, 0, 32, 76, 80, 120, 128, 144, 160, 182)) %CB_A.cb_h, ptr @CB_A.cb, align 4
+// CHECK-NEXT: %CB_A.cb_h = call target("dx.CBuffer", %__cblayout_CB_A)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CB_Ast(i32 5, i32 0, i32 1, i32 0, ptr @CB_A.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CB_A) %CB_A.cb_h, ptr @CB_A.cb, align 4
// CHECK: define internal void @_init_buffer_CB_B.cb()
// CHECK-NEXT: entry:
-// CHECK-NEXT: %CB_B.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_B, 94, 0, 88))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CB_Bs_94_0_88tt(i32 6, i32 0, i32 1, i32 0, ptr @CB_B.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_B, 94, 0, 88)) %CB_B.cb_h, ptr @CB_B.cb, align 4
+// CHECK-NEXT: %CB_B.cb_h = call target("dx.CBuffer", %__cblayout_CB_B)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CB_Bst(i32 6, i32 0, i32 1, i32 0, ptr @CB_B.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CB_B) %CB_B.cb_h, ptr @CB_B.cb, align 4
// CHECK: define internal void @_init_buffer_CB_C.cb()
// CHECK-NEXT: entry:
-// CHECK-NEXT: %CB_C.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_C, 400, 0, 16, 112, 128, 392))
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CB_Cs_400_0_16_112_128_392tt(i32 7, i32 0, i32 1, i32 0, ptr @CB_C.str)
-// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_C, 400, 0, 16, 112, 128, 392)) %CB_C.cb_h, ptr @CB_C.cb, align 4
+// CHECK-NEXT: %CB_C.cb_h = call target("dx.CBuffer", %__cblayout_CB_C)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CB_Cst(i32 7, i32 0, i32 1, i32 0, ptr @CB_C.str)
+// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CB_C) %CB_C.cb_h, ptr @CB_C.cb, align 4
RWBuffer<float> Buf;
diff --git a/clang/test/CodeGenHLSL/resources/cbuffer_and_namespaces.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer_and_namespaces.hlsl
index b7bdce32e6507..1fe0a68261c94 100644
--- a/clang/test/CodeGenHLSL/resources/cbuffer_and_namespaces.hlsl
+++ b/clang/test/CodeGenHLSL/resources/cbuffer_and_namespaces.hlsl
@@ -4,18 +4,18 @@
// CHECK: %"n0::n1::__cblayout_A" = type <{ float }>
// CHECK: %"n0::__cblayout_B" = type <{ float }>
-// CHECK: %"n0::n2::__cblayout_C" = type <{ float, target("dx.Layout", %"n0::Foo", 4, 0) }>
+// CHECK: %"n0::n2::__cblayout_C" = type <{ float, target("dx.Padding", 12), %"n0::Foo" }>
// CHECK: %"n0::Foo" = type <{ float }>
-// CHECK: @A.cb = global target("dx.CBuffer", target("dx.Layout", %"n0::n1::__cblayout_A", 4, 0))
+// CHECK: @A.cb = global target("dx.CBuffer", %"n0::n1::__cblayout_A")
// CHECK: @_ZN2n02n11aE = external hidden addrspace(2) global float, align 4
-// CHECK: @B.cb = global target("dx.CBuffer", target("dx.Layout", %"n0::__cblayout_B", 4, 0))
+// CHECK: @B.cb = global target("dx.CBuffer", %"n0::__cblayout_B")
// CHECK: @_ZN2n01aE = external hidden addrspace(2) global float, align 4
-// CHECK: @C.cb = global target("dx.CBuffer", target("dx.Layout", %"n0::n2::__cblayout_C", 20, 0, 16))
+// CHECK: @C.cb = global target("dx.CBuffer", %"n0::n2::__cblayout_C")
// CHECK: @_ZN2n02n21aE = external hidden addrspace(2) global float, align 4
-// CHECK: external hidden addrspace(2) global target("dx.Layout", %"n0::Foo", 4, 0), align 1
+// CHECK: external hidden addrspace(2) global %"n0::Foo", align 1
namespace n0 {
struct Foo {
diff --git a/clang/test/CodeGenHLSL/resources/cbuffer_with_packoffset.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer_with_packoffset.hlsl
index 7bedd63c9f65d..c39fd75ec6ee4 100644
--- a/clang/test/CodeGenHLSL/resources/cbuffer_with_packoffset.hlsl
+++ b/clang/test/CodeGenHLSL/resources/cbuffer_with_packoffset.hlsl
@@ -2,10 +2,13 @@
// RUN: dxil-pc-shadermodel6.3-compute %s \
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s
-// CHECK: %__cblayout_CB = type <{ float, double, <2 x i32> }>
-// CHECK: %__cblayout_CB_1 = type <{ float, <2 x float> }>
+// TODO: Reordering fields doesn't work...
+// XFAIL: *
-// CHECK: @CB.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 176, 16, 168, 88))
+// CHECK: %__cblayout_CB = type <{ [16 x i8], float, [68 x i8], <2 x i32>, [72 x i8], double }>
+// CHECK: %__cblayout_CB_1 = type <{ [80 x i8], <2 x float>, float }>
+
+// CHECK: @CB.cb = global target("dx.CBuffer", %__cblayout_CB)
// CHECK: @a = external hidden addrspace(2) global float, align 4
// CHECK: @b = external hidden addrspace(2) global double, align 8
// CHECK: @c = external hidden addrspace(2) global <2 x i32>, align 8
@@ -17,7 +20,7 @@ cbuffer CB : register(b1, space3) {
int2 c : packoffset(c5.z);
}
-// CHECK: @CB.cb.1 = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_1, 92, 88, 80))
+// CHECK: @CB.cb.1 = global target("dx.CBuffer", %__cblayout_CB_1)
// CHECK: @x = external hidden addrspace(2) global float, align 4
// CHECK: @y = external hidden addrspace(2) global <2 x float>, align 8
@@ -30,7 +33,7 @@ cbuffer CB : register(b0) {
// CHECK: define internal void @_init_buffer_CB.cb()
// CHECK-NEXT: entry:
-// CHECK-NEXT: %CB.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 176, 16, 168, 88))
+// CHECK-NEXT: %CB.cb_h = call target("dx.CBuffer", %__cblayout_CB)
// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBs_176_16_168_88tt(i32 3, i32 1, i32 1, i32 0, ptr @CB.str)
float foo() {
diff --git a/clang/test/CodeGenHLSL/resources/cbuffer_with_static_global_and_function.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer_with_static_global_and_function.hlsl
index fa3405df9e3d3..b8c7babb8d634 100644
--- a/clang/test/CodeGenHLSL/resources/cbuffer_with_static_global_and_function.hlsl
+++ b/clang/test/CodeGenHLSL/resources/cbuffer_with_static_global_and_function.hlsl
@@ -2,7 +2,7 @@
// CHECK: %__cblayout_A = type <{ float }>
-// CHECK: @A.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_A, 4, 0))
+// CHECK: @A.cb = global target("dx.CBuffer", %__cblayout_A)
// CHECK: @a = external hidden addrspace(2) global float, align 4
// CHECK-DAG: @_ZL1b = internal global float 3.000000e+00, align 4
// CHECK-NOT: @B.cb
diff --git a/clang/test/CodeGenHLSL/resources/default_cbuffer.hlsl b/clang/test/CodeGenHLSL/resources/default_cbuffer.hlsl
index ad4d92f8afc02..5333dad962195 100644
--- a/clang/test/CodeGenHLSL/resources/default_cbuffer.hlsl
+++ b/clang/test/CodeGenHLSL/resources/default_cbuffer.hlsl
@@ -1,19 +1,18 @@
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-compute -fnative-half-type -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,DXIL
// RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan1.3-compute -fnative-half-type -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,SPIRV
-// DXIL: %"__cblayout_$Globals" = type <{ float, float, target("dx.Layout", %__cblayout_S, 4, 0) }>
-// SPIRV: %"__cblayout_$Globals" = type <{ float, float, target("spirv.Layout", %__cblayout_S, 4, 0) }>
+// CHECK: %"__cblayout_$Globals" = type <{ float, float, target("{{.*}}.Padding", 8), %__cblayout_S }>
// CHECK: %__cblayout_S = type <{ float }>
-// DXIL-DAG: @"$Globals.cb" = global target("dx.CBuffer", target("dx.Layout", %"__cblayout_$Globals", 20, 0, 4, 16))
+// DXIL-DAG: @"$Globals.cb" = global target("dx.CBuffer", %"__cblayout_$Globals")
// DXIL-DAG: @a = external hidden addrspace(2) global float
// DXIL-DAG: @g = external hidden addrspace(2) global float
-// DXIL-DAG: @h = external hidden addrspace(2) global target("dx.Layout", %__cblayout_S, 4, 0), align 4
+// DXIL-DAG: @h = external hidden addrspace(2) global %__cblayout_S, align 4
-// SPIRV-DAG: @"$Globals.cb" = global target("spirv.VulkanBuffer", target("spirv.Layout", %"__cblayout_$Globals", 20, 0, 4, 16), 2, 0)
+// SPIRV-DAG: @"$Globals.cb" = global target("spirv.VulkanBuffer", %"__cblayout_$Globals", 2, 0)
// SPIRV-DAG: @a = external hidden addrspace(12) global float
// SPIRV-DAG: @g = external hidden addrspace(12) global float
-// SPIRV-DAG: @h = external hidden addrspace(12) global target("spirv.Layout", %__cblayout_S, 4, 0), align 8
+// SPIRV-DAG: @h = external hidden addrspace(12) global %__cblayout_S, align 8
struct EmptyStruct {
};
diff --git a/clang/test/CodeGenHLSL/resources/default_cbuffer_with_layout.hlsl b/clang/test/CodeGenHLSL/resources/default_cbuffer_with_layout.hlsl
index 1b2cb0e99aa83..fee3b77c32dd2 100644
--- a/clang/test/CodeGenHLSL/resources/default_cbuffer_with_layout.hlsl
+++ b/clang/test/CodeGenHLSL/resources/default_cbuffer_with_layout.hlsl
@@ -13,6 +13,9 @@
// CHECK-DAG: @e = external hidden addrspace(2) global <4 x float>, align 16
// CHECK-DAG: @s = external hidden addrspace(2) global target("dx.Layout", %S, 8, 0), align 1
+// TODO: Reordering fields doesn't work...
+// XFAIL: *
+
struct S {
float2 v;
};
diff --git a/llvm/docs/DirectX/DXILResources.rst b/llvm/docs/DirectX/DXILResources.rst
index f253e02f4cdd9..db7d4a4342eb7 100644
--- a/llvm/docs/DirectX/DXILResources.rst
+++ b/llvm/docs/DirectX/DXILResources.rst
@@ -277,7 +277,7 @@ Examples:
Accessing Resources as Memory
-----------------------------
-*relevant types: Buffers and Textures*
+*relevant types: Buffers, Textures, and CBuffers*
Loading and storing from resources is generally represented in LLVM using
operations on memory that is only accessible via a handle object. Given a
@@ -302,14 +302,14 @@ stores are described later in this document.
-
- Pointer
- A pointer to an object in the buffer
- * - ``%buffer``
+ * - ``%resource``
- 0
- - ``target(dx.TypedBuffer, ...)``
- - The buffer to access
+ - Any buffer, texture, or cbuffer type
+ - The resource to access
* - ``%index``
- 1
- ``i32``
- - Index into the buffer
+ - Index into the resource
Examples:
diff --git a/llvm/include/llvm/Frontend/HLSL/CBuffer.h b/llvm/include/llvm/Frontend/HLSL/CBuffer.h
index 694a7fa854576..f4e232ffe1745 100644
--- a/llvm/include/llvm/Frontend/HLSL/CBuffer.h
+++ b/llvm/include/llvm/Frontend/HLSL/CBuffer.h
@@ -46,7 +46,8 @@ class CBufferMetadata {
CBufferMetadata(NamedMDNode *MD) : MD(MD) {}
public:
- static std::optional<CBufferMetadata> get(Module &M);
+ static std::optional<CBufferMetadata>
+ get(Module &M, llvm::function_ref<bool(Type *)> IsPadding);
using iterator = SmallVector<CBufferMapping>::iterator;
iterator begin() { return Mappings.begin(); }
@@ -55,9 +56,6 @@ class CBufferMetadata {
void eraseFromModule();
};
-APInt translateCBufArrayOffset(const DataLayout &DL, APInt Offset,
- ArrayType *Ty);
-
} // namespace hlsl
} // namespace llvm
diff --git a/llvm/lib/Frontend/HLSL/CBuffer.cpp b/llvm/lib/Frontend/HLSL/CBuffer.cpp
index 1e4a1a5020b0b..744bf9fcd32a7 100644
--- a/llvm/lib/Frontend/HLSL/CBuffer.cpp
+++ b/llvm/lib/Frontend/HLSL/CBuffer.cpp
@@ -15,25 +15,28 @@
using namespace llvm;
using namespace llvm::hlsl;
-static size_t getMemberOffset(GlobalVariable *Handle, size_t Index) {
+static SmallVector<size_t>
+getMemberOffsets(const DataLayout &DL, GlobalVariable *Handle,
+ llvm::function_ref<bool(Type *)> IsPadding) {
+ SmallVector<size_t> Offsets;
+
auto *HandleTy = cast<TargetExtType>(Handle->getValueType());
assert((HandleTy->getName().ends_with(".CBuffer") ||
HandleTy->getName() == "spirv.VulkanBuffer") &&
"Not a cbuffer type");
assert(HandleTy->getNumTypeParameters() == 1 && "Expected layout type");
+ auto *LayoutTy = cast<StructType>(HandleTy->getTypeParameter(0));
- auto *LayoutTy = cast<TargetExtType>(HandleTy->getTypeParameter(0));
- assert(LayoutTy->getName().ends_with(".Layout") && "Not a layout type");
-
- // Skip the "size" parameter.
- size_t ParamIndex = Index + 1;
- assert(LayoutTy->getNumIntParameters() > ParamIndex &&
- "Not enough parameters");
+ const StructLayout *SL = DL.getStructLayout(LayoutTy);
+ for (int I = 0, E = LayoutTy->getNumElements(); I < E; ++I)
+ if (!IsPadding(LayoutTy->getElementType(I)))
+ Offsets.push_back(SL->getElementOffset(I));
- return LayoutTy->getIntParameter(ParamIndex);
+ return Offsets;
}
-std::optional<CBufferMetadata> CBufferMetadata::get(Module &M) {
+std::optional<CBufferMetadata>
+CBufferMetadata::get(Module &M, llvm::function_ref<bool(Type *)> IsPadding) {
NamedMDNode *CBufMD = M.getNamedMetadata("hlsl.cbs");
if (!CBufMD)
return std::nullopt;
@@ -52,13 +55,16 @@ std::optional<CBufferMetadata> CBufferMetadata::get(Module &M) {
cast<GlobalVariable>(cast<ValueAsMetadata>(OpMD)->getValue());
CBufferMapping &Mapping = Result->Mappings.emplace_back(Handle);
+ SmallVector<size_t> MemberOffsets =
+ getMemberOffsets(M.getDataLayout(), Handle, IsPadding);
+
for (int I = 1, E = MD->getNumOperands(); I < E; ++I) {
Metadata *OpMD = MD->getOperand(I);
// Some members may be null if they've been optimized out.
if (!OpMD)
continue;
auto *V = cast<GlobalVariable>(cast<ValueAsMetadata>(OpMD)->getValue());
- Mapping.Members.emplace_back(V, getMemberOffset(Handle, I - 1));
+ Mapping.Members.emplace_back(V, MemberOffsets[I - 1]);
}
}
@@ -69,10 +75,3 @@ void CBufferMetadata::eraseFromModule() {
// Remove the cbs named metadata
MD->eraseFromParent();
}
-
-APInt hlsl::translateCBufArrayOffset(const DataLayout &DL, APInt Offset,
- ArrayType *Ty) {
- int64_t TypeSize = DL.getTypeSizeInBits(Ty->getElementType()) / 8;
- int64_t RoundUp = alignTo(TypeSize, Align(CBufferRowSizeInBytes));
- return Offset.udiv(TypeSize) * RoundUp;
-}
diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp
index 0e9535d24a4cc..b060a6c4f3bf6 100644
--- a/llvm/lib/IR/Type.cpp
+++ b/llvm/lib/IR/Type.cpp
@@ -1008,6 +1008,10 @@ static TargetTypeInfo getTargetTypeInfo(const TargetExtType *Ty) {
}
if (Name == "spirv.IntegralConstant" || Name == "spirv.Literal")
return TargetTypeInfo(Type::getVoidTy(C));
+ if (Name == "spirv.Padding")
+ return TargetTypeInfo(
+ ArrayType::get(Type::getInt8Ty(C), Ty->getIntParameter(0)),
+ TargetExtType::CanBeGlobal);
if (Name.starts_with("spirv."))
return TargetTypeInfo(PointerType::get(C, 0), TargetExtType::HasZeroInit,
TargetExtType::CanBeGlobal,
diff --git a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
index 44277971acd60..56245321a522a 100644
--- a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
@@ -8,11 +8,13 @@
#include "DXILCBufferAccess.h"
#include "DirectX.h"
+#include "llvm/Analysis/DXILResource.h"
#include "llvm/Frontend/HLSL/CBuffer.h"
#include "llvm/Frontend/HLSL/HLSLResource.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsDirectX.h"
+#include "llvm/IR/ReplaceConstant.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/FormatVariadic.h"
@@ -21,297 +23,41 @@
#define DEBUG_TYPE "dxil-cbuffer-access"
using namespace llvm;
-namespace {
-/// Helper for building a `load.cbufferrow` intrinsic given a simple type.
-struct CBufferRowIntrin {
- Intrinsic::ID IID;
- Type *RetTy;
- unsigned int EltSize;
- unsigned int NumElts;
-
- CBufferRowIntrin(const DataLayout &DL, Type *Ty) {
- assert(Ty == Ty->getScalarType() && "Expected scalar type");
-
- switch (DL.getTypeSizeInBits(Ty)) {
- case 16:
- IID = Intrinsic::dx_resource_load_cbufferrow_8;
- RetTy = StructType::get(Ty, Ty, Ty, Ty, Ty, Ty, Ty, Ty);
- EltSize = 2;
- NumElts = 8;
- break;
- case 32:
- IID = Intrinsic::dx_resource_load_cbufferrow_4;
- RetTy = StructType::get(Ty, Ty, Ty, Ty);
- EltSize = 4;
- NumElts = 4;
- break;
- case 64:
- IID = Intrinsic::dx_resource_load_cbufferrow_2;
- RetTy = StructType::get(Ty, Ty);
- EltSize = 8;
- NumElts = 2;
- break;
- default:
- llvm_unreachable("Only 16, 32, and 64 bit types supported");
- }
- }
-};
-
-// Helper for creating CBuffer handles and loading data from them
-struct CBufferResource {
- GlobalVariable *GVHandle;
- GlobalVariable *Member;
- size_t MemberOffset;
-
- LoadInst *Handle;
-
- CBufferResource(GlobalVariable *GVHandle, GlobalVariable *Member,
- size_t MemberOffset)
- : GVHandle(GVHandle), Member(Member), MemberOffset(MemberOffset) {}
-
- const DataLayout &getDataLayout() { return GVHandle->getDataLayout(); }
- Type *getValueType() { return Member->getValueType(); }
- iterator_range<ConstantDataSequential::user_iterator> users() {
- return Member->users();
- }
-
- /// Get the byte offset of a Pointer-typed Value * `Val` relative to Member.
- /// `Val` can either be Member itself, or a GEP of a constant offset from
- /// Member
- size_t getOffsetForCBufferGEP(Value *Val) {
- assert(isa<PointerType>(Val->getType()) &&
- "Expected a pointer-typed value");
-
- if (Val == Member)
- return 0;
-
- if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
- // Since we should always have a constant offset, we should only ever have
- // a single GEP of indirection from the Global.
- assert(GEP->getPointerOperand() == Member &&
- "Indirect access to resource handle");
-
- const DataLayout &DL = getDataLayout();
- APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
- bool Success = GEP->accumulateConstantOffset(DL, ConstantOffset);
- (void)Success;
- assert(Success && "Offsets into cbuffer globals must be constant");
-
- if (auto *ATy = dyn_cast<ArrayType>(Member->getValueType()))
- ConstantOffset =
- hlsl::translateCBufArrayOffset(DL, ConstantOffset, ATy);
-
- return ConstantOffset.getZExtValue();
- }
-
- llvm_unreachable("Expected Val to be a GlobalVariable or GEP");
- }
-
- /// Create a handle for this cbuffer resource using the IRBuilder `Builder`
- /// and sets the handle as the current one to use for subsequent calls to
- /// `loadValue`
- void createAndSetCurrentHandle(IRBuilder<> &Builder) {
- Handle = Builder.CreateLoad(GVHandle->getValueType(), GVHandle,
- GVHandle->getName());
+static void replaceUsersOfGlobal(GlobalVariable *Global,
+ GlobalVariable *HandleGV, size_t Offset) {
+ for (Use &U : make_early_inc_range(Global->uses())) {
+ auto UseInst = dyn_cast<Instruction>(U.getUser());
+ // TODO: Constants? Metadata?
+ assert(UseInst && "Non-instruction use of cbuffer");
+
+ IRBuilder<> Builder(UseInst);
+ LoadInst *Handle = Builder.CreateLoad(HandleGV->getValueType(), HandleGV,
+ HandleGV->getName());
+ Value *Ptr = Builder.CreateIntrinsic(
+ Global->getType(), Intrinsic::dx_resource_getpointer,
+ ArrayRef<Value *>{Handle,
+ ConstantInt::get(Builder.getInt32Ty(), Offset)});
+ U.set(Ptr);
}
- /// Load a value of type `Ty` at offset `Offset` using the handle from the
- /// last call to `createAndSetCurrentHandle`
- Value *loadValue(IRBuilder<> &Builder, Type *Ty, size_t Offset,
- const Twine &Name = "") {
- assert(Handle &&
- "Expected a handle for this cbuffer global resource to be created "
- "before loading a value from it");
- const DataLayout &DL = getDataLayout();
-
- size_t TargetOffset = MemberOffset + Offset;
- CBufferRowIntrin Intrin(DL, Ty->getScalarType());
- // The cbuffer consists of some number of 16-byte rows.
- unsigned int CurrentRow = TargetOffset / hlsl::CBufferRowSizeInBytes;
- unsigned int CurrentIndex =
- (TargetOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize;
-
- auto *CBufLoad = Builder.CreateIntrinsic(
- Intrin.RetTy, Intrin.IID,
- {Handle, ConstantInt::get(Builder.getInt32Ty(), CurrentRow)}, nullptr,
- Name + ".load");
- auto *Elt = Builder.CreateExtractValue(CBufLoad, {CurrentIndex++},
- Name + ".extract");
-
- Value *Result = nullptr;
- unsigned int Remaining =
- ((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1;
-
- if (Remaining == 0) {
- // We only have a single element, so we're done.
- Result = Elt;
-
- // However, if we loaded a <1 x T>, then we need to adjust the type here.
- if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
- assert(VT->getNumElements() == 1 &&
- "Can't have multiple elements here");
- Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result,
- Builder.getInt32(0), Name);
- }
- return Result;
- }
-
- // Walk each element and extract it, wrapping to new rows as needed.
- SmallVector<Value *> Extracts{Elt};
- while (Remaining--) {
- CurrentIndex %= Intrin.NumElts;
-
- if (CurrentIndex == 0)
- CBufLoad = Builder.CreateIntrinsic(
- Intrin.RetTy, Intrin.IID,
- {Handle, ConstantInt::get(Builder.getInt32Ty(), ++CurrentRow)},
- nullptr, Name + ".load");
-
- Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++},
- Name + ".extract"));
- }
-
- // Finally, we build up the original loaded value.
- Result = PoisonValue::get(Ty);
- for (int I = 0, E = Extracts.size(); I < E; ++I)
- Result =
- Builder.CreateInsertElement(Result, Extracts[I], Builder.getInt32(I),
- Name + formatv(".upto{}", I));
- return Result;
- }
-};
-
-} // namespace
-
-/// Replace load via cbuffer global with a load from the cbuffer handle itself.
-static void replaceLoad(LoadInst *LI, CBufferResource &CBR,
- SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
- size_t Offset = CBR.getOffsetForCBufferGEP(LI->getPointerOperand());
- IRBuilder<> Builder(LI);
- CBR.createAndSetCurrentHandle(Builder);
- Value *Result = CBR.loadValue(Builder, LI->getType(), Offset, LI->getName());
- LI->replaceAllUsesWith(Result);
- DeadInsts.push_back(LI);
-}
-
-/// This function recursively copies N array elements from the cbuffer resource
-/// CBR to the MemCpy Destination. Recursion is used to unravel multidimensional
-/// arrays into a sequence of scalar/vector extracts and stores.
-static void copyArrayElemsForMemCpy(IRBuilder<> &Builder, MemCpyInst *MCI,
- CBufferResource &CBR, ArrayType *ArrTy,
- size_t ArrOffset, size_t N,
- const Twine &Name = "") {
- const DataLayout &DL = MCI->getDataLayout();
- Type *ElemTy = ArrTy->getElementType();
- size_t ElemTySize = DL.getTypeAllocSize(ElemTy);
- for (unsigned I = 0; I < N; ++I) {
- size_t Offset = ArrOffset + I * ElemTySize;
-
- // Recursively copy nested arrays
- if (ArrayType *ElemArrTy = dyn_cast<ArrayType>(ElemTy)) {
- copyArrayElemsForMemCpy(Builder, MCI, CBR, ElemArrTy, Offset,
- ElemArrTy->getNumElements(), Name);
- continue;
- }
-
- // Load CBuffer value and store it in Dest
- APInt CBufArrayOffset(
- DL.getIndexTypeSizeInBits(MCI->getSource()->getType()), Offset);
- CBufArrayOffset =
- hlsl::translateCBufArrayOffset(DL, CBufArrayOffset, ArrTy);
- Value *CBufferVal =
- CBR.loadValue(Builder, ElemTy, CBufArrayOffset.getZExtValue(), Name);
- Value *GEP =
- Builder.CreateInBoundsGEP(Builder.getInt8Ty(), MCI->getDest(),
- {Builder.getInt32(Offset)}, Name + ".dest");
- Builder.CreateStore(CBufferVal, GEP, MCI->isVolatile());
- }
-}
-
-/// Replace memcpy from a cbuffer global with a memcpy from the cbuffer handle
-/// itself. Assumes the cbuffer global is an array, and the length of bytes to
-/// copy is divisible by array element allocation size.
-/// The memcpy source must also be a direct cbuffer global reference, not a GEP.
-static void replaceMemCpy(MemCpyInst *MCI, CBufferResource &CBR) {
-
- ArrayType *ArrTy = dyn_cast<ArrayType>(CBR.getValueType());
- assert(ArrTy && "MemCpy lowering is only supported for array types");
-
- // This assumption vastly simplifies the implementation
- if (MCI->getSource() != CBR.Member)
- reportFatalUsageError(
- "Expected MemCpy source to be a cbuffer global variable");
-
- ConstantInt *Length = dyn_cast<ConstantInt>(MCI->getLength());
- uint64_t ByteLength = Length->getZExtValue();
-
- // If length to copy is zero, no memcpy is needed
- if (ByteLength == 0) {
- MCI->eraseFromParent();
- return;
- }
-
- const DataLayout &DL = CBR.getDataLayout();
-
- Type *ElemTy = ArrTy->getElementType();
- size_t ElemSize = DL.getTypeAllocSize(ElemTy);
- assert(ByteLength % ElemSize == 0 &&
- "Length of bytes to MemCpy must be divisible by allocation size of "
- "source/destination array elements");
- size_t ElemsToCpy = ByteLength / ElemSize;
-
- IRBuilder<> Builder(MCI);
- CBR.createAndSetCurrentHandle(Builder);
-
- copyArrayElemsForMemCpy(Builder, MCI, CBR, ArrTy, 0, ElemsToCpy,
- "memcpy." + MCI->getDest()->getName() + "." +
- MCI->getSource()->getName());
-
- MCI->eraseFromParent();
-}
-
-static void replaceAccessesWithHandle(CBufferResource &CBR) {
- SmallVector<WeakTrackingVH> DeadInsts;
-
- SmallVector<User *> ToProcess{CBR.users()};
- while (!ToProcess.empty()) {
- User *Cur = ToProcess.pop_back_val();
-
- // If we have a load instruction, replace the access.
- if (auto *LI = dyn_cast<LoadInst>(Cur)) {
- replaceLoad(LI, CBR, DeadInsts);
- continue;
- }
-
- // If we have a memcpy instruction, replace it with multiple accesses and
- // subsequent stores to the destination
- if (auto *MCI = dyn_cast<MemCpyInst>(Cur)) {
- replaceMemCpy(MCI, CBR);
- continue;
- }
-
- // Otherwise, walk users looking for a load...
- if (isa<GetElementPtrInst>(Cur) || isa<GEPOperator>(Cur)) {
- ToProcess.append(Cur->user_begin(), Cur->user_end());
- continue;
- }
-
- llvm_unreachable("Unexpected user of Global");
- }
- RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
+ Global->removeFromParent();
}
static bool replaceCBufferAccesses(Module &M) {
- std::optional<hlsl::CBufferMetadata> CBufMD = hlsl::CBufferMetadata::get(M);
+ std::optional<hlsl::CBufferMetadata> CBufMD = hlsl::CBufferMetadata::get(
+ M, [](Type *Ty) { return isa<llvm::dxil::PaddingExtType>(Ty); });
if (!CBufMD)
return false;
+ SmallVector<Constant *> CBufferGlobals;
+ for (const hlsl::CBufferMapping &Mapping : *CBufMD)
+ for (const hlsl::CBufferMember &Member : Mapping.Members)
+ CBufferGlobals.push_back(Member.GV);
+ convertUsersOfConstantsToInstructions(CBufferGlobals);
+
for (const hlsl::CBufferMapping &Mapping : *CBufMD)
- for (const hlsl::CBufferMember &Member : Mapping.Members) {
- CBufferResource CBR(Mapping.Handle, Member.GV, Member.Offset);
- replaceAccessesWithHandle(CBR);
- Member.GV->removeFromParent();
- }
+ for (const hlsl::CBufferMember &Member : Mapping.Members)
+ replaceUsersOfGlobal(Member.GV, Mapping.Handle, Member.Offset);
CBufMD->eraseFromModule();
return true;
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index bcf84403b2c0d..be3d762379fd9 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -109,9 +109,9 @@ class DirectXPassConfig : public TargetPassConfig {
void addCodeGenPrepare() override {
addPass(createDXILFinalizeLinkageLegacyPass());
addPass(createGlobalDCEPass());
+ addPass(createDXILCBufferAccessLegacyPass());
addPass(createDXILResourceAccessLegacyPass());
addPass(createDXILIntrinsicExpansionLegacyPass());
- addPass(createDXILCBufferAccessLegacyPass());
addPass(createDXILDataScalarizationLegacyPass());
ScalarizerPassOptions DxilScalarOptions;
DxilScalarOptions.ScalarizeLoadStore = true;
diff --git a/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp b/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp
index 3ca0b40cac93e..329774df554f4 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp
@@ -54,7 +54,12 @@ static Instruction *findHandleDef(GlobalVariable *HandleVar) {
}
static bool replaceCBufferAccesses(Module &M) {
- std::optional<hlsl::CBufferMetadata> CBufMD = hlsl::CBufferMetadata::get(M);
+ std::optional<hlsl::CBufferMetadata> CBufMD =
+ hlsl::CBufferMetadata::get(M, [](Type *Ty) {
+ if (auto *TET = dyn_cast<TargetExtType>(Ty))
+ return TET->getName() == "spirv.Padding";
+ return false;
+ });
if (!CBufMD)
return false;
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index a151fd2fbdb7a..b32856e836230 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -835,9 +835,6 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(
if (Ty->isArrayTy())
Ty = Ty->getArrayElementType();
else {
- TargetExtType *BufferTy = cast<TargetExtType>(Ty);
- assert(BufferTy->getTargetExtName() == "spirv.Layout");
- Ty = BufferTy->getTypeParameter(0);
assert(Ty && Ty->isStructTy());
uint32_t Index = cast<ConstantInt>(II->getOperand(1))->getZExtValue();
Ty = cast<StructType>(Ty)->getElementType(Index);
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll
deleted file mode 100644
index 52ad0f3df1aba..0000000000000
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s
-
-; cbuffer CB : register(b0) {
-; float a1[3];
-; }
-%__cblayout_CB = type <{ [3 x float] }>
-
- at CB.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) poison
-; CHECK: @CB.cb =
-; CHECK-NOT: external {{.*}} addrspace(2) global
- at a1 = external addrspace(2) global [3 x float], align 4
-
-; CHECK: define void @f
-define void @f(ptr %dst) {
-entry:
- %CB.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBs_36_0tt(i32 0, i32 0, i32 1, i32 0, ptr null)
- store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) %CB.cb_h, ptr @CB.cb, align 4
-
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
- ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
- ; CHECK: store float [[X]], ptr %dst
- %a1 = load float, ptr addrspace(2) getelementptr inbounds ([3 x float], ptr addrspace(2) @a1, i32 0, i32 1), align 4
- store float %a1, ptr %dst, align 32
-
- ret void
-}
-
-; CHECK-NOT: !hlsl.cbs =
-!hlsl.cbs = !{!0}
-
-!0 = !{ptr @CB.cb, ptr addrspace(2) @a1}
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll
index db4e14c1336a6..47c9a094686fb 100644
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll
@@ -1,124 +1,54 @@
; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s
+; TODO: Remove datalayout.
+; This hack forces dxil-compatible alignment of 3-element 32- and 64-bit vectors
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64-v96:32:32-v192:64:64"
+
; cbuffer CB : register(b0) {
-; float a1[3];
-; double3 a2[2];
-; float16_t a3[2][2];
-; uint64_t a4[3];
-; int4 a5[2][3][4];
-; uint16_t a6[1];
-; int64_t a7[2];
-; bool a8[4];
+; float a1[3]; // offset 0, size 4 (+12) * 3
+; double3 a2[2]; // offset 48, size 24 (+8) * 2
+; float16_t a3[2][2]; // offset 112, size 2 (+14) * 4
; }
-%__cblayout_CB = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [2 x [3 x [4 x <4 x i32>]]], [1 x i16], [2 x i64], [4 x i32] }>
+%__cblayout_CB = type <{
+ <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, target("dx.Padding", 12),
+ <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, target("dx.Padding", 8),
+ <{ [3 x <{ half, target("dx.Padding", 14) }>], half }>, target("dx.Padding", 14),
+}>
- at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) poison
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
; CHECK: @CB.cb =
; CHECK-NOT: external {{.*}} addrspace(2) global
- at a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4
- at a2 = external local_unnamed_addr addrspace(2) global [2 x <3 x double>], align 32
- at a3 = external local_unnamed_addr addrspace(2) global [2 x [2 x half]], align 2
- at a4 = external local_unnamed_addr addrspace(2) global [3 x i64], align 8
- at a5 = external local_unnamed_addr addrspace(2) global [2 x [3 x [4 x <4 x i32>]]], align 16
- at a6 = external local_unnamed_addr addrspace(2) global [1 x i16], align 2
- at a7 = external local_unnamed_addr addrspace(2) global [2 x i64], align 8
- at a8 = external local_unnamed_addr addrspace(2) global [4 x i32], align 4
+ at a1 = external local_unnamed_addr addrspace(2) global <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, align 4
+ at a2 = external local_unnamed_addr addrspace(2) global <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, align 32
+ at a3 = external local_unnamed_addr addrspace(2) global <{ [3 x <{ half, target("dx.Padding", 14) }>], half }>, align 2
; CHECK: define void @f
define void @f(ptr %dst) {
entry:
- %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
- store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) %CB.cb_h.i.i, ptr @CB.cb, align 4
+ %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+ store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
- ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
- ; CHECK: store float [[X]], ptr %dst
- %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 4), align 4
+ ; CHECK: [[PTR:%.*]] = call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 0)
+ ; CHECK: getelementptr inbounds nuw i8, ptr addrspace(2) [[PTR]], i32 16
+ %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 16), align 4
store float %a1, ptr %dst, align 32
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5)
- ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
- ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
- ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 6)
- ; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
- ; CHECK: [[VEC0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
- ; CHECK: [[VEC1:%.*]] = insertelement <3 x double> [[VEC0]], double [[Y]], i32 1
- ; CHECK: [[VEC2:%.*]] = insertelement <3 x double> [[VEC1]], double [[Z]], i32 2
- ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 8
- ; CHECK: store <3 x double> [[VEC2]], ptr [[PTR]]
+ ; CHECK: [[PTR:%.*]] = call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 48)
+ ; CHECK: getelementptr inbounds nuw i8, ptr addrspace(2) [[PTR]], i32 32
%a2 = load <3 x double>, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a2, i32 32), align 8
%a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 8
store <3 x double> %a2, ptr %a2.i, align 32
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 8)
- ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
- ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 32
- ; CHECK: store half [[X]], ptr [[PTR]]
- %a3 = load half, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a3, i32 6), align 2
+ ; CHECK: [[PTR:%.*]] = call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 112)
+ ; CHECK: getelementptr inbounds nuw i8, ptr addrspace(2) [[PTR]], i32 16
+ %a3 = load half, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a3, i32 16), align 2
%a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 32
store half %a3, ptr %a3.i, align 2
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 12)
- ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
- ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 40
- ; CHECK: store i64 [[X]], ptr [[PTR]]
- %a4 = load i64, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a4, i32 8), align 8
- %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 40
- store i64 %a4, ptr %a4.i, align 8
-
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 26)
- ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
- ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
- ; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
- ; CHECK: [[A:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3
- ; CHECK: [[VEC0:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
- ; CHECK: [[VEC1:%.*]] = insertelement <4 x i32> [[VEC0]], i32 [[Y]], i32 1
- ; CHECK: [[VEC2:%.*]] = insertelement <4 x i32> [[VEC1]], i32 [[Z]], i32 2
- ; CHECK: [[VEC3:%.*]] = insertelement <4 x i32> [[VEC2]], i32 [[A]], i32 3
- ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 48
- ; CHECK: store <4 x i32> [[VEC3]], ptr [[PTR]]
- %a5 = load <4 x i32>, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a5, i32 272), align 4
- %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 48
- store <4 x i32> %a5, ptr %a5.i, align 4
-
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 38)
- ; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0
- ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 64
- ; CHECK: store i16 [[X]], ptr [[PTR]]
- %a6 = load i16, ptr addrspace(2) @a6, align 2
- %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 64
- store i16 %a6, ptr %a6.i, align 2
-
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 40)
- ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
- ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 72
- ; CHECK: store i64 [[X]], ptr [[PTR]]
- %a7 = load i64, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a7, i32 8), align 8
- %a7.i = getelementptr inbounds nuw i8, ptr %dst, i32 72
- store i64 %a7, ptr %a7.i, align 8
-
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 42)
- ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
- ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 80
- ; CHECK: store i32 [[X]], ptr [[PTR]]
- %a8 = load i32, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a8, i32 4), align 4, !range !1, !noundef !2
- %a8.i = getelementptr inbounds nuw i8, ptr %dst, i32 80
- store i32 %a8, ptr %a8.i, align 4
-
ret void
}
; CHECK-NOT: !hlsl.cbs =
!hlsl.cbs = !{!0}
-!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6, ptr addrspace(2) @a7, ptr addrspace(2) @a8}
-!1 = !{i32 0, i32 2}
-!2 = !{}
+!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3}
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll
deleted file mode 100644
index d7272b449166d..0000000000000
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s
-
-%__cblayout_CB = type <{ float }>
-
- at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 4, 0)) poison
-; CHECK: @CB.cb =
-; CHECK-NOT: external {{.*}} addrspace(2) global
- at x = external local_unnamed_addr addrspace(2) global float, align 4
-
-; CHECK: define void @f
-define void @f(ptr %dst) {
-entry:
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
- ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
- ; CHECK: store float [[X]], ptr %dst
- %x = load float, ptr addrspace(2) @x, align 4
- store float %x, ptr %dst, align 4
- ret void
-}
-
-; CHECK-NOT: !hlsl.cbs =
-!hlsl.cbs = !{!0}
-
-!0 = !{ptr @CB.cb, ptr addrspace(2) @x}
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll
index abe087dbe6100..8ea6c1c459b5b 100644
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll
@@ -3,28 +3,24 @@
; cbuffer CB : register(b0) {
; float a1[3];
; }
-%__cblayout_CB = type <{ [3 x float] }>
+%__cblayout_CB = type <{ [2 x <{ float, [12 x i8] }>], float }>
- at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) poison
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
; CHECK: @CB.cb =
; CHECK-NOT: external {{.*}} addrspace(2) global
- at a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4
+ at a1 = external addrspace(2) global <{ [2 x <{ float, [12 x i8] }>], float }>, align 4
; CHECK: define void @f
define void @f(ptr %dst) {
entry:
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
- ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
- ; CHECK: store float [[X]], ptr %dst
- %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 4), align 4
+ ; CHECK: [[PTR:%.*]] = call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 0)
+ ; CHECK: getelementptr inbounds nuw i8, ptr addrspace(2) [[PTR]], i32 16
+ %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 16), align 4
store float %a1, ptr %dst, align 32
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
- ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
- ; CHECK: store float [[X]], ptr %dst
- %a2 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 4), align 4
+ ; CHECK: [[PTR:%.*]] = call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 0)
+ ; CHECK: getelementptr inbounds nuw i8, ptr addrspace(2) [[PTR]], i32 16
+ %a2 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 16), align 4
store float %a2, ptr %dst, align 32
ret void
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll
deleted file mode 100644
index f1486f974fb36..0000000000000
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll
+++ /dev/null
@@ -1,216 +0,0 @@
-; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s
-
-; cbuffer CB : register(b0) {
-; float a1[3];
-; double3 a2[2];
-; float16_t a3[2][2];
-; uint64_t a4[3];
-; int2 a5[3][2];
-; uint16_t a6[1];
-; int64_t a7[2];
-; bool a8[4];
-; }
-%__cblayout_CB = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [3 x [2 x <2 x i32>]], [1 x i16], [2 x i64], [4 x i32] }>
-
- at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) poison
- at a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4
- at a2 = external local_unnamed_addr addrspace(2) global [2 x <3 x double>], align 32
- at a3 = external local_unnamed_addr addrspace(2) global [2 x [2 x half]], align 2
- at a4 = external local_unnamed_addr addrspace(2) global [3 x i64], align 8
- at a5 = external local_unnamed_addr addrspace(2) global [3 x [2 x <2 x i32>]], align 16
- at a6 = external local_unnamed_addr addrspace(2) global [1 x i16], align 2
- at a7 = external local_unnamed_addr addrspace(2) global [2 x i64], align 8
- at a8 = external local_unnamed_addr addrspace(2) global [4 x i32], align 4
-
-; CHECK: define void @f(
-define void @f(ptr %dst) {
-entry:
- %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
- store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) %CB.cb_h.i.i, ptr @CB.cb, align 4
-
- %a1.copy = alloca [3 x float], align 4
- %a2.copy = alloca [2 x <3 x double>], align 32
- %a3.copy = alloca [2 x [2 x half]], align 2
- %a4.copy = alloca [3 x i64], align 8
- %a5.copy = alloca [3 x [2 x <2 x i32>]], align 16
- %a6.copy = alloca [1 x i16], align 2
- %a7.copy = alloca [2 x i64], align 8
- %a8.copy = alloca [4 x i32], align 4
-
- ; Try copying no elements
-; CHECK-NOT: memcpy
- call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a1.copy, ptr addrspace(2) align 4 @a1, i32 0, i1 false)
-
- ; Try copying only the first element
-; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
-; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY:%.*]], i32 0
-; CHECK: store float [[X]], ptr [[DEST]], align 4
- call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a1.copy, ptr addrspace(2) align 4 @a1, i32 4, i1 false)
-
-; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
-; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY:%.*]], i32 0
-; CHECK: store float [[X]], ptr [[DEST]], align 4
-; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
-; CHECK: [[Y:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY]], i32 4
-; CHECK: store float [[Y]], ptr [[DEST]], align 4
-; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 2)
-; CHECK: [[Z:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY]], i32 8
-; CHECK: store float [[Z]], ptr [[DEST]], align 4
- call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a1.copy, ptr addrspace(2) align 4 @a1, i32 12, i1 false)
-
-; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 3)
-; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
-; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
-; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 4)
-; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
-; CHECK: [[UPTO0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
-; CHECK: [[UPTO1:%.*]] = insertelement <3 x double> [[UPTO0]], double [[Y]], i32 1
-; CHECK: [[UPTO2:%.*]] = insertelement <3 x double> [[UPTO1]], double [[Z]], i32 2
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY:%.*]], i32 0
-; CHECK: store <3 x double> [[UPTO2]], ptr [[DEST]], align 8
-; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5)
-; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
-; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
-; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 6)
-; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
-; CHECK: [[UPTO0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
-; CHECK: [[UPTO1:%.*]] = insertelement <3 x double> [[UPTO0]], double [[Y]], i32 1
-; CHECK: [[UPTO2:%.*]] = insertelement <3 x double> [[UPTO1]], double [[Z]], i32 2
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY]], i32 24
-; CHECK: store <3 x double> [[UPTO2]], ptr [[DEST]], align 8
- call void @llvm.memcpy.p0.p2.i32(ptr align 32 %a2.copy, ptr addrspace(2) align 32 @a2, i32 48, i1 false)
-
-; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 7)
-; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY:%.*]], i32 0
-; CHECK: store half [[X]], ptr [[DEST]], align 2
-; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 8)
-; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 2
-; CHECK: store half [[Y]], ptr [[DEST]], align 2
-; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 9)
-; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 4
-; CHECK: store half [[X]], ptr [[DEST]], align 2
-; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 10)
-; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 6
-; CHECK: store half [[Y]], ptr [[DEST]], align 2
- call void @llvm.memcpy.p0.p2.i32(ptr align 2 %a3.copy, ptr addrspace(2) align 2 @a3, i32 8, i1 false)
-
-; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 11)
-; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY:%.*]], i32 0
-; CHECK: store i64 [[X]], ptr [[DEST]], align 8
-; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 12)
-; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY]], i32 8
-; CHECK: store i64 [[Y]], ptr [[DEST]], align 8
-; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 13)
-; CHECK: [[Z:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY]], i32 16
-; CHECK: store i64 [[Z]], ptr [[DEST]], align 8
- call void @llvm.memcpy.p0.p2.i32(ptr align 8 %a4.copy, ptr addrspace(2) align 8 @a4, i32 24, i1 false)
-
-; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 14)
-; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
-; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
-; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY:%.*]], i32 0
-; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
-; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 15)
-; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
-; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
-; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 8
-; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
-; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 16)
-; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
-; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
-; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 16
-; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
-; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 17)
-; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
-; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
-; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 24
-; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
-; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 18)
-; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
-; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
-; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 32
-; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
-; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 19)
-; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
-; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
-; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 40
-; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
- call void @llvm.memcpy.p0.p2.i32(ptr align 16 %a5.copy, ptr addrspace(2) align 16 @a5, i32 48, i1 false)
-
-; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 17)
-; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A6_COPY:%.*]], i32 0
-; CHECK: store i16 [[X]], ptr [[DEST]], align 2
- call void @llvm.memcpy.p0.p2.i32(ptr align 2 %a6.copy, ptr addrspace(2) align 2 @a6, i32 2, i1 false)
-
-; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 18)
-; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A7_COPY:%.*]], i32 0
-; CHECK: store i64 [[X]], ptr [[DEST]], align 8
-; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 19)
-; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A7_COPY]], i32 8
-; CHECK: store i64 [[Y]], ptr [[DEST]], align 8
- call void @llvm.memcpy.p0.p2.i32(ptr align 8 %a7.copy, ptr addrspace(2) align 8 @a7, i32 16, i1 false)
-
-; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
-; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 20)
-; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY:%.*]], i32 0
-; CHECK: store i32 [[X]], ptr [[DEST]], align 4
-; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 21)
-; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 4
-; CHECK: store i32 [[Y]], ptr [[DEST]], align 4
-; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 22)
-; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 8
-; CHECK: store i32 [[Z]], ptr [[DEST]], align 4
-; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 23)
-; CHECK: [[W:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
-; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 12
-; CHECK: store i32 [[W]], ptr [[DEST]], align 4
- call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a8.copy, ptr addrspace(2) align 4 @a8, i32 16, i1 false)
-
- ret void
-}
-
-declare void @llvm.memcpy.p0.p2.i32(ptr noalias writeonly captures(none), ptr addrspace(2) noalias readonly captures(none), i32, i1 immarg)
-
-; CHECK-NOT: !hlsl.cbs =
-!hlsl.cbs = !{!0}
-
-!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6, ptr addrspace(2) @a7, ptr addrspace(2) @a8}
-!1 = !{i32 0, i32 2}
-!2 = !{}
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll
index 7857c25d69636..7cc0faac617c7 100644
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll
@@ -4,97 +4,40 @@
; float a1; // offset 0, size 4
; int a2; // offset 4, size 4
; bool a3; // offset 8, size 4
-; float16_t a4; // offset 12, size 2
-; uint16_t a5; // offset 14, size 2
-; double a6; // offset 16, size 8
-; int64_t a7; // offset 24, size 8
; }
-%__cblayout_CB = type <{ float, i32, i32, half, i16, double, i64 }>
+%__cblayout_CB = type <{ float, i32, i32 }>
- at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) poison
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
; CHECK: @CB.cb =
; CHECK-NOT: external {{.*}} addrspace(2) global
@a1 = external local_unnamed_addr addrspace(2) global float, align 4
@a2 = external local_unnamed_addr addrspace(2) global i32, align 4
@a3 = external local_unnamed_addr addrspace(2) global i32, align 4
- at a4 = external local_unnamed_addr addrspace(2) global half, align 2
- at a5 = external local_unnamed_addr addrspace(2) global i16, align 2
- at a6 = external local_unnamed_addr addrspace(2) global double, align 8
- at a7 = external local_unnamed_addr addrspace(2) global i64, align 8
; CHECK: define void @f
define void @f(ptr %dst) {
entry:
- %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
- store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) %CB.cb_h.i.i, ptr @CB.cb, align 4
+ %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+ store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
- ; CHECK: [[A1:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
- ; CHECK: store float [[A1]], ptr %dst
+ ; CHECK: call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 0)
%a1 = load float, ptr addrspace(2) @a1, align 4
store float %a1, ptr %dst, align 8
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
- ; CHECK: [[A2:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
- ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4
- ; CHECK: store i32 [[A2]], ptr [[PTR]]
+ ; CHECK: call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 4)
%a2 = load i32, ptr addrspace(2) @a2, align 4
%a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 4
store i32 %a2, ptr %a2.i, align 4
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
- ; CHECK: [[A3:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
- ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 8
- ; CHECK: store i32 [[A3]], ptr [[PTR]]
+ ; CHECK: call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 8)
%a3 = load i32, ptr addrspace(2) @a3, align 4, !range !1, !noundef !2
%a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 8
store i32 %a3, ptr %a3.i, align 8
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
- ; CHECK: [[A4:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 6
- ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 12
- ; CHECK: store half [[A4]], ptr [[PTR]]
- %a4 = load half, ptr addrspace(2) @a4, align 2
- %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 12
- store half %a4, ptr %a4.i, align 4
-
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
- ; CHECK: [[A5:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 7
- ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 14
- ; CHECK: store i16 [[A5]], ptr [[PTR]]
- %a5 = load i16, ptr addrspace(2) @a5, align 2
- %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 14
- store i16 %a5, ptr %a5.i, align 2
-
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
- ; CHECK: [[A6:%.*]] = extractvalue { double, double } [[LOAD]], 0
- ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 16
- ; CHECK: store double [[A6]], ptr [[PTR]]
- %a6 = load double, ptr addrspace(2) @a6, align 8
- %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 16
- store double %a6, ptr %a6.i, align 8
-
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
- ; CHECK: [[A7:%.*]] = extractvalue { i64, i64 } [[LOAD]], 1
- ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 24
- ; CHECK: store i64 [[A7]], ptr [[PTR]]
- %a7 = load i64, ptr addrspace(2) @a7, align 8
- %a7.i = getelementptr inbounds nuw i8, ptr %dst, i32 24
- store i64 %a7, ptr %a7.i, align 8
-
ret void
}
; CHECK-NOT: !hlsl.cbs =
!hlsl.cbs = !{!0}
-!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6, ptr addrspace(2) @a7}
-!1 = !{i32 0, i32 2}
-!2 = !{}
+!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3}
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll
deleted file mode 100644
index 4160008a986af..0000000000000
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll
+++ /dev/null
@@ -1,119 +0,0 @@
-; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s
-
-; cbuffer CB {
-; float3 a1; // offset 0, size 12 (+4)
-; double3 a2; // offset 16, size 24
-; float16_t2 a3; // offset 40, size 4 (+4)
-; uint64_t3 a4; // offset 48, size 24 (+8)
-; int4 a5; // offset 80, size 16
-; uint16_t3 a6; // offset 96, size 6 (+10)
-; };
-%__cblayout_CB = type <{ <3 x float>, <3 x double>, <2 x half>, <3 x i64>, <4 x i32>, <3 x i16> }>
-
- at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 102, 0, 16, 40, 48, 80, 96)) poison
-; CHECK: @CB.cb =
-; CHECK-NOT: external {{.*}} addrspace(2) global
- at a1 = external local_unnamed_addr addrspace(2) global <3 x float>, align 16
- at a2 = external local_unnamed_addr addrspace(2) global <3 x double>, align 32
- at a3 = external local_unnamed_addr addrspace(2) global <2 x half>, align 4
- at a4 = external local_unnamed_addr addrspace(2) global <3 x i64>, align 32
- at a5 = external local_unnamed_addr addrspace(2) global <4 x i32>, align 16
- at a6 = external local_unnamed_addr addrspace(2) global <3 x i16>, align 8
-
-; CHECK: define void @f
-define void @f(ptr %dst) {
-entry:
- %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 102, 0, 16, 40, 48, 80, 96)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
- store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 102, 0, 16, 40, 48, 80, 96)) %CB.cb_h.i.i, ptr @CB.cb, align 4
-
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
- ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
- ; CHECK: [[Y:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 1
- ; CHECK: [[Z:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 2
- ; CHECK: [[VEC0:%.*]] = insertelement <3 x float> poison, float [[X]], i32 0
- ; CHECK: [[VEC1:%.*]] = insertelement <3 x float> [[VEC0]], float [[Y]], i32 1
- ; CHECK: [[VEC2:%.*]] = insertelement <3 x float> [[VEC1]], float [[Z]], i32 2
- ; CHECK: store <3 x float> [[VEC2]], ptr %dst
- %a1 = load <3 x float>, ptr addrspace(2) @a1, align 16
- store <3 x float> %a1, ptr %dst, align 4
-
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
- ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
- ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
- ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 2)
- ; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
- ; CHECK: [[VEC0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
- ; CHECK: [[VEC1:%.*]] = insertelement <3 x double> [[VEC0]], double [[Y]], i32 1
- ; CHECK: [[VEC2:%.*]] = insertelement <3 x double> [[VEC1]], double [[Z]], i32 2
- ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 16
- ; CHECK: store <3 x double> [[VEC2]], ptr [[PTR]]
- %a2 = load <3 x double>, ptr addrspace(2) @a2, align 32
- %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 16
- store <3 x double> %a2, ptr %a2.i, align 8
-
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 2)
- ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 4
- ; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 5
- ; CHECK: [[VEC0:%.*]] = insertelement <2 x half> poison, half [[X]], i32 0
- ; CHECK: [[VEC1:%.*]] = insertelement <2 x half> [[VEC0]], half [[Y]], i32 1
- ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 40
- ; CHECK: store <2 x half> [[VEC1]], ptr [[PTR]]
- %a3 = load <2 x half>, ptr addrspace(2) @a3, align 4
- %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 40
- store <2 x half> %a3, ptr %a3.i, align 2
-
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 3)
- ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
- ; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 1
- ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 4)
- ; CHECK: [[Z:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
- ; CHECK: [[VEC0:%.*]] = insertelement <3 x i64> poison, i64 [[X]], i32 0
- ; CHECK: [[VEC1:%.*]] = insertelement <3 x i64> [[VEC0]], i64 [[Y]], i32 1
- ; CHECK: [[VEC2:%.*]] = insertelement <3 x i64> [[VEC1]], i64 [[Z]], i32 2
- ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 48
- ; CHECK: store <3 x i64> [[VEC2]], ptr [[PTR]]
- %a4 = load <3 x i64>, ptr addrspace(2) @a4, align 32
- %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 48
- store <3 x i64> %a4, ptr %a4.i, align 8
-
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5)
- ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
- ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
- ; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
- ; CHECK: [[A:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3
- ; CHECK: [[VEC0:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
- ; CHECK: [[VEC1:%.*]] = insertelement <4 x i32> [[VEC0]], i32 [[Y]], i32 1
- ; CHECK: [[VEC2:%.*]] = insertelement <4 x i32> [[VEC1]], i32 [[Z]], i32 2
- ; CHECK: [[VEC3:%.*]] = insertelement <4 x i32> [[VEC2]], i32 [[A]], i32 3
- ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 72
- ; CHECK: store <4 x i32> [[VEC3]], ptr [[PTR]]
- %a5 = load <4 x i32>, ptr addrspace(2) @a5, align 16
- %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 72
- store <4 x i32> %a5, ptr %a5.i, align 4
-
- ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
- ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 6)
- ; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0
- ; CHECK: [[Y:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 1
- ; CHECK: [[Z:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 2
- ; CHECK: [[VEC0:%.*]] = insertelement <3 x i16> poison, i16 [[X]], i32 0
- ; CHECK: [[VEC1:%.*]] = insertelement <3 x i16> [[VEC0]], i16 [[Y]], i32 1
- ; CHECK: [[VEC2:%.*]] = insertelement <3 x i16> [[VEC1]], i16 [[Z]], i32 2
- ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 88
- ; CHECK: store <3 x i16> [[VEC2]], ptr [[PTR]]
- %a6 = load <3 x i16>, ptr addrspace(2) @a6, align 8
- %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 88
- store <3 x i16> %a6, ptr %a6.i, align 2
-
- ret void
-}
-
-; CHECK-NOT: !hlsl.cbs =
-!hlsl.cbs = !{!0}
-
-!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6}
diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
index 13c25396ea98f..fc078a6159ac4 100644
--- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll
+++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
@@ -15,10 +15,10 @@
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: DXIL Finalize Linkage
; CHECK-NEXT: Dead Global Elimination
+; CHECK-NEXT: DXIL CBuffer Access
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: DXIL Resource Access
; CHECK-NEXT: DXIL Intrinsic Expansion
-; CHECK-NEXT: DXIL CBuffer Access
; CHECK-NEXT: DXIL Data Scalarization
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll
index 6d41875798ebc..8f6166b7232c3 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll
@@ -21,7 +21,7 @@
%MyStruct = type { <4 x float> }
%__cblayout_MyCBuffer = type <{ %MyStruct, <4 x float> }>
- at MyCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_MyCBuffer, 32, 0, 16), 2, 0) poison
+ at MyCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) poison
@s = external hidden local_unnamed_addr addrspace(12) global %MyStruct, align 16
@v = external hidden local_unnamed_addr addrspace(12) global <4 x float>, align 16
@MyCBuffer.str = private unnamed_addr constant [10 x i8] c"MyCBuffer\00", align 1
@@ -30,10 +30,10 @@
define void @main() {
entry:
; CHECK: %[[tmp:[0-9]+]] = OpCopyObject %[[wrapper_ptr_t]] %[[MyCBuffer]]
- %MyCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_MyCBuffer, 32, 0, 16), 2, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_MyCBuffers_32_0_16t_2_0t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @MyCBuffer.str)
- store target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_MyCBuffer, 32, 0, 16), 2, 0) %MyCBuffer.cb_h.i.i, ptr @MyCBuffer.cb, align 8
+ %MyCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) @llvm.spv.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr nonnull @MyCBuffer.str)
+ store target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) %MyCBuffer.cb_h.i.i, ptr @MyCBuffer.cb, align 8
%0 = tail call target("spirv.Image", float, 5, 2, 0, 0, 2, 3) @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_5_2_0_0_2_3t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str)
-
+
; CHECK: %[[tmp_ptr:[0-9]+]] = OpAccessChain {{%[0-9]+}} %[[tmp]] %[[uint_0]] %[[uint_0]]
; CHECK: %[[v_ptr:.+]] = OpAccessChain %[[_ptr_Uniform_v4float]] %[[tmp]] %[[uint_0]] %[[uint_1]]
; CHECK: %[[s_ptr_gep:[0-9]+]] = OpInBoundsAccessChain %[[_ptr_Uniform_float]] %[[tmp_ptr]] %[[uint_0]] %[[uint_1]]
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer_unused.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer_unused.ll
index c365452a9b404..670548d3d3e27 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer_unused.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer_unused.ll
@@ -14,12 +14,12 @@
%__cblayout_PartiallyUsedCBuffer = type <{ float, i32 }>
%__cblayout_AnotherCBuffer = type <{ <4 x float>, <4 x float> }>
- at UnusedCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_UnusedCBuffer, 4, 0), 2, 0) poison
+ at UnusedCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_UnusedCBuffer, 2, 0) poison
@UnusedCBuffer.str = private unnamed_addr constant [14 x i8] c"UnusedCBuffer\00", align 1
- at PartiallyUsedCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_PartiallyUsedCBuffer, 8, 0, 4), 2, 0) poison
+ at PartiallyUsedCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_PartiallyUsedCBuffer, 2, 0) poison
@used_member = external hidden local_unnamed_addr addrspace(12) global float, align 4
@PartiallyUsedCBuffer.str = private unnamed_addr constant [21 x i8] c"PartiallyUsedCBuffer\00", align 1
- at AnotherCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_AnotherCBuffer, 32, 0, 16), 2, 0) poison
+ at AnotherCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_AnotherCBuffer, 2, 0) poison
@a = external hidden local_unnamed_addr addrspace(12) global <4 x float>, align 16
@AnotherCBuffer.str = private unnamed_addr constant [15 x i8] c"AnotherCBuffer\00", align 1
@.str = private unnamed_addr constant [7 x i8] c"output\00", align 1
@@ -28,18 +28,18 @@
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(readwrite, argmem: write, inaccessiblemem: none)
define void @main() local_unnamed_addr #1 {
entry:
- %UnusedCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_UnusedCBuffer, 4, 0), 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_UnusedCBuffers_4_0t_2_0t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @UnusedCBuffer.str)
- store target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_UnusedCBuffer, 4, 0), 2, 0) %UnusedCBuffer.cb_h.i.i, ptr @UnusedCBuffer.cb, align 8
+ %UnusedCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_UnusedCBuffer, 2, 0) @llvm.spv.resource.handlefromimplicitbinding(i32 0, i32 0, i32 1, i32 0, ptr nonnull @UnusedCBuffer.str)
+ store target("spirv.VulkanBuffer", %__cblayout_UnusedCBuffer, 2, 0) %UnusedCBuffer.cb_h.i.i, ptr @UnusedCBuffer.cb, align 8
; CHECK: %[[tmp:[0-9]+]] = OpCopyObject {{%[0-9]+}} %[[PartiallyUsedCBuffer]]
; CHECK: %[[used_member_ptr:.+]] = OpAccessChain %{{.+}} %[[tmp]] %{{.+}} %[[uint_0:[0-9]+]]
- %PartiallyUsedCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_PartiallyUsedCBuffer, 8, 0, 4), 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_PartiallyUsedCBuffers_8_0_4t_2_0t(i32 1, i32 0, i32 1, i32 0, ptr nonnull @PartiallyUsedCBuffer.str)
- store target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_PartiallyUsedCBuffer, 8, 0, 4), 2, 0) %PartiallyUsedCBuffer.cb_h.i.i, ptr @PartiallyUsedCBuffer.cb, align 8
+ %PartiallyUsedCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_PartiallyUsedCBuffer, 2, 0) @llvm.spv.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr nonnull @PartiallyUsedCBuffer.str)
+ store target("spirv.VulkanBuffer", %__cblayout_PartiallyUsedCBuffer, 2, 0) %PartiallyUsedCBuffer.cb_h.i.i, ptr @PartiallyUsedCBuffer.cb, align 8
; CHECK: %[[tmp:[0-9]+]] = OpCopyObject {{%[0-9]+}} %[[AnotherCBuffer]]
; CHECK: %[[a_ptr:.+]] = OpAccessChain %{{.+}} %[[tmp]] %{{.+}} %[[uint_0]]
- %AnotherCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_AnotherCBuffer, 32, 0, 16), 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_AnotherCBuffers_32_0_16t_2_0t(i32 2, i32 0, i32 1, i32 0, ptr nonnull @AnotherCBuffer.str)
- store target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_AnotherCBuffer, 32, 0, 16), 2, 0) %AnotherCBuffer.cb_h.i.i, ptr @AnotherCBuffer.cb, align 8
+ %AnotherCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_AnotherCBuffer, 2, 0) @llvm.spv.resource.handlefromimplicitbinding(i32 2, i32 0, i32 1, i32 0, ptr nonnull @AnotherCBuffer.str)
+ store target("spirv.VulkanBuffer", %__cblayout_AnotherCBuffer, 2, 0) %AnotherCBuffer.cb_h.i.i, ptr @AnotherCBuffer.cb, align 8
%0 = tail call target("spirv.Image", float, 5, 2, 0, 0, 2, 1) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_f32_5_2_0_0_2_1t(i32 3, i32 0, i32 1, i32 0, ptr nonnull @.str)
%2 = load float, ptr addrspace(12) @used_member, align 4
>From df4150e64dd552ce60703440f776582a9582bc05 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Fri, 26 Sep 2025 15:00:52 -0700
Subject: [PATCH 6/6] [DirectX] Drop dx.Layout extension type
TODO: This must go in after the frontend changes
---
llvm/include/llvm/Analysis/DXILResource.h | 21 -----
llvm/lib/Analysis/DXILResource.cpp | 13 ---
.../DirectX/Metadata/cbuffer-layouttype.ll | 82 -------------------
3 files changed, 116 deletions(-)
delete mode 100644 llvm/test/CodeGen/DirectX/Metadata/cbuffer-layouttype.ll
diff --git a/llvm/include/llvm/Analysis/DXILResource.h b/llvm/include/llvm/Analysis/DXILResource.h
index c7aff167324e6..bcbca78504041 100644
--- a/llvm/include/llvm/Analysis/DXILResource.h
+++ b/llvm/include/llvm/Analysis/DXILResource.h
@@ -222,27 +222,6 @@ class AnyResourceExtType : public TargetExtType {
}
};
-/// The dx.Layout target extension type
-///
-/// `target("dx.Layout", <Type>, <size>, [offsets...])`
-class LayoutExtType : public TargetExtType {
-public:
- LayoutExtType() = delete;
- LayoutExtType(const LayoutExtType &) = delete;
- LayoutExtType &operator=(const LayoutExtType &) = delete;
-
- Type *getWrappedType() const { return getTypeParameter(0); }
- uint32_t getSize() const { return getIntParameter(0); }
- uint32_t getOffsetOfElement(int I) const { return getIntParameter(I + 1); }
-
- static bool classof(const TargetExtType *T) {
- return T->getName() == "dx.Layout";
- }
- static bool classof(const Type *T) {
- return isa<TargetExtType>(T) && classof(cast<TargetExtType>(T));
- }
-};
-
/// The dx.Padding target extension type
///
/// `target("dx.Padding", NumBytes)`
diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp
index 6f19a68dcd194..14d9925b4297a 100644
--- a/llvm/lib/Analysis/DXILResource.cpp
+++ b/llvm/lib/Analysis/DXILResource.cpp
@@ -380,13 +380,6 @@ StructType *ResourceTypeInfo::createElementStruct(StringRef CBufferName) {
Name.append(CBufferName);
}
- // TODO: Remove this when we update the frontend to use explicit padding.
- if (LayoutExtType *LayoutType =
- dyn_cast<LayoutExtType>(RTy->getResourceType())) {
- StructType *Ty = cast<StructType>(LayoutType->getWrappedType());
- return StructType::create(Ty->elements(), Name);
- }
-
return getOrCreateElementStruct(
getTypeWithoutPadding(RTy->getResourceType()), Name);
}
@@ -498,13 +491,7 @@ ResourceTypeInfo::UAVInfo ResourceTypeInfo::getUAV() const {
uint32_t ResourceTypeInfo::getCBufferSize(const DataLayout &DL) const {
assert(isCBuffer() && "Not a CBuffer");
-
Type *ElTy = cast<CBufferExtType>(HandleTy)->getResourceType();
-
- // TODO: Remove this when we update the frontend to use explicit padding.
- if (auto *LayoutTy = dyn_cast<LayoutExtType>(ElTy))
- return LayoutTy->getSize();
-
return DL.getTypeAllocSize(ElTy);
}
diff --git a/llvm/test/CodeGen/DirectX/Metadata/cbuffer-layouttype.ll b/llvm/test/CodeGen/DirectX/Metadata/cbuffer-layouttype.ll
deleted file mode 100644
index 85952c9ae4e83..0000000000000
--- a/llvm/test/CodeGen/DirectX/Metadata/cbuffer-layouttype.ll
+++ /dev/null
@@ -1,82 +0,0 @@
-; TODO: Remove this test once we've updated the frontend to use explicit
-; padding. The cbuffer-metadata.ll test covers the newer logic.
-
-; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s
-; RUN: opt -S --passes="dxil-pretty-printer" < %s 2>&1 | FileCheck %s --check-prefix=PRINT
-; RUN: llc %s --filetype=asm -o - < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,PRINT
-
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-pc-shadermodel6.6-compute"
-
-%__cblayout_CB1 = type <{ float, i32, double, <2 x i32> }>
- at CB1.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB1, 24, 0, 4, 8, 16)) poison
- at CB1.str = private unnamed_addr constant [4 x i8] c"CB1\00", align 1
-
-%__cblayout_CB2 = type <{ float, double, float, half, i16, i64, i32 }>
- at CB2.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB2, 36, 0, 8, 16, 20, 22, 24, 32)) poison
- at CB2.str = private unnamed_addr constant [4 x i8] c"CB2\00", align 1
-
-%__cblayout_MyConstants = type <{ double, <3 x float>, float, <3 x double>, half, <2 x double>, float, <3 x half>, <3 x half> }>
- at MyConstants.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_MyConstants, 96, 0, 16, 28, 32, 56, 64, 80, 84, 90)) poison
- at MyConstants.str = private unnamed_addr constant [12 x i8] c"MyConstants\00", align 1
-
-; PRINT:; Resource Bindings:
-; PRINT-NEXT:;
-; PRINT-NEXT:; Name Type Format Dim ID HLSL Bind Count
-; PRINT-NEXT:; ----
-; PRINT-NEXT:; CB1 cbuffer NA NA CB0 cb0 1
-; PRINT-NEXT:; CB2 cbuffer NA NA CB1 cb1 1
-; PRINT-NEXT:; MyConstants cbuffer NA NA CB2 cb5,space15 1
-
-define void @test() #0 {
-
- ; cbuffer CB1 : register(b0) {
- ; float a;
- ; int b;
- ; double c;
- ; int2 d;
- ; }
- %CB1.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB1, 24, 0, 4, 8, 16))
- @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr @CB1.str)
- ; cbuffer CB2 : register(b0) {
- ; float a;
- ; double b;
- ; float c;
- ; half d;
- ; uint16_t e;
- ; int64_t f;
- ; int g;
- ;}
-
- %CB2.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB2, 36, 0, 8, 16, 20, 22, 24, 32))
- @llvm.dx.resource.handlefrombinding(i32 0, i32 1, i32 1, i32 0, ptr @CB2.str)
- ; cbuffer CB3 : register(b5) {
- ; double B0;
- ; float3 B1;
- ; float B2;
- ; double3 B3;
- ; half B4;
- ; double2 B5;
- ; float B6;
- ; half3 B7;
- ; half3 B8;
- ; }
- %CB3.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_MyConstants, 96, 0, 16, 28, 32, 56, 64, 80, 84, 90))
- @llvm.dx.resource.handlefrombinding(i32 15, i32 5, i32 1, i32 0, ptr @MyConstants.str)
-
- ret void
-}
-
-attributes #0 = { noinline nounwind "hlsl.shader"="compute" }
-
-; CHECK: @CB1 = external constant %CBuffer.CB1
-; CHECK: @CB2 = external constant %CBuffer.CB2
-; CHECK: @MyConstants = external constant %CBuffer.MyConstants
-
-; CHECK: !dx.resources = !{[[ResList:[!][0-9]+]]}
-
-; CHECK: [[ResList]] = !{null, null, [[CBList:[!][0-9]+]], null}
-; CHECK: [[CBList]] = !{![[CB1:[0-9]+]], ![[CB2:[0-9]+]], ![[MYCONSTANTS:[0-9]+]]}
-; CHECK: ![[CB1]] = !{i32 0, ptr @CB1, !"CB1", i32 0, i32 0, i32 1, i32 24, null}
-; CHECK: ![[CB2]] = !{i32 1, ptr @CB2, !"CB2", i32 0, i32 1, i32 1, i32 36, null}
-; CHECK: ![[MYCONSTANTS]] = !{i32 2, ptr @MyConstants, !"MyConstants", i32 15, i32 5, i32 1, i32 96, null}
More information about the cfe-commits
mailing list