[llvm] [DirectX] Avoid precalculating GEPs in DXILResourceAccess (PR #172720)
Justin Bogner via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 18 13:44:07 PST 2025
https://github.com/bogner updated https://github.com/llvm/llvm-project/pull/172720
>From a936c53ba6aa30c2742b14f014e3ae4a9887b6e8 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Mon, 15 Dec 2025 15:44:35 -0700
Subject: [PATCH 1/3] [DirectX] Avoid precalculating GEPs in DXILResourceAccess
Instead of trying to precalculate GEP offsets ahead of time and then
process resource accesses based off of these offsets, traverse the GEP
chain inline for each access. This makes it easier to get the types
correct when translating GEPs for cbuffer and structured buffer
accesses, which in turn lets us access individual elements of those
structures directly.
Fixes #160208, #164517, and #169430
---
.../lib/Target/DirectX/DXILResourceAccess.cpp | 233 +++++++++++-------
.../load-cbuffer-array-of-struct.ll | 17 ++
.../ResourceAccess/load-cbuffer-dynamic.ll | 25 +-
.../ResourceAccess/load-structbuf-geps.ll | 129 ++++++++++
.../ResourceAccess/load_typedbuffer.ll | 2 +-
.../ResourceAccess/store-structbuf-geps.ll | 95 +++++++
.../ResourceAccess/store_typedbuffer.ll | 2 +-
7 files changed, 398 insertions(+), 105 deletions(-)
create mode 100644 llvm/test/CodeGen/DirectX/ResourceAccess/load-structbuf-geps.ll
create mode 100644 llvm/test/CodeGen/DirectX/ResourceAccess/store-structbuf-geps.ll
diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
index 057d87bc3c6a9..234cba5997305 100644
--- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
@@ -28,59 +28,78 @@
using namespace llvm;
-static Value *calculateGEPOffset(GetElementPtrInst *GEP, Value *PrevOffset,
- dxil::ResourceTypeInfo &RTI) {
- assert(!PrevOffset && "Non-constant GEP chains not handled yet");
-
- const DataLayout &DL = GEP->getDataLayout();
-
- uint64_t ScalarSize = 1;
- if (RTI.isTyped()) {
- Type *ContainedType = RTI.getHandleTy()->getTypeParameter(0);
- // We need the size of an element in bytes so that we can calculate the
- // offset in elements given a total offset in bytes.
- Type *ScalarType = ContainedType->getScalarType();
- ScalarSize = DL.getTypeSizeInBits(ScalarType) / 8;
- }
-
- APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
- if (GEP->accumulateConstantOffset(DL, ConstantOffset)) {
- APInt Scaled = ConstantOffset.udiv(ScalarSize);
- return ConstantInt::get(DL.getIndexType(GEP->getType()), Scaled);
- }
+static Value *traverseGEPOffsets(const DataLayout &DL, IRBuilder<> &Builder,
+ Value *Ptr, uint64_t AccessSize) {
+ Value *Offset = nullptr;
+
+ while (Ptr) {
+ if (auto *II = dyn_cast<IntrinsicInst>(Ptr)) {
+ assert(II->getIntrinsicID() == Intrinsic::dx_resource_getpointer &&
+ "Resource access through unexpected intrinsic");
+ return Offset ? Offset : ConstantInt::get(Builder.getInt32Ty(), 0);
+ }
- unsigned NumIndices = GEP->getNumIndices();
-
- // If we have a single index we're indexing into a top level array. This
- // generally only happens with cbuffers.
- if (NumIndices == 1)
- return *GEP->idx_begin();
-
- // If we have two indices, this should be a simple access through a pointer.
- if (NumIndices == 2) {
- auto IndexIt = GEP->idx_begin();
- assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 &&
- "GEP is not indexing through pointer");
- ++IndexIt;
- Value *Offset = *IndexIt;
- assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP");
- return Offset;
+ auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ assert(GEP && "Resource access through unexpected instruction");
+
+ unsigned NumIndices = GEP->getNumIndices();
+ uint64_t IndexScale = DL.getTypeAllocSize(GEP->getSourceElementType());
+ APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
+ Value *GEPOffset;
+ if (GEP->accumulateConstantOffset(DL, ConstantOffset)) {
+ // We have a constant offset (in bytes).
+ GEPOffset =
+ ConstantInt::get(DL.getIndexType(GEP->getType()), ConstantOffset);
+ IndexScale = 1;
+ } else if (NumIndices == 1) {
+ // If we have a single index we're indexing into a top level array. This
+ // generally only happens with cbuffers.
+ GEPOffset = *GEP->idx_begin();
+ } else if (NumIndices == 2) {
+ // If we have two indices, this should be an access through a pointer.
+ auto IndexIt = GEP->idx_begin();
+ assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 &&
+ "GEP is not indexing through pointer");
+ GEPOffset = *(++IndexIt);
+ assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP");
+ } else
+ llvm_unreachable("Unhandled GEP structure for resource access");
+
+ uint64_t ElemSize = AccessSize;
+ if (!(IndexScale % ElemSize)) {
+ // If our scale is an exact multiple of the access size, adjust the
+ // scaling to avoid an unnecessary division.
+ IndexScale /= ElemSize;
+ ElemSize = 1;
+ }
+ if (IndexScale != 1)
+ GEPOffset = Builder.CreateMul(
+ GEPOffset, ConstantInt::get(Builder.getInt32Ty(), IndexScale));
+ if (ElemSize != 1)
+ GEPOffset = Builder.CreateUDiv(
+ GEPOffset, ConstantInt::get(Builder.getInt32Ty(), ElemSize));
+
+ Offset = Offset ? Builder.CreateAdd(Offset, GEPOffset) : GEPOffset;
+ Ptr = GEP->getPointerOperand();
}
- llvm_unreachable("Unhandled GEP structure for resource access");
+ llvm_unreachable("GEP of null pointer?");
}
static void createTypedBufferStore(IntrinsicInst *II, StoreInst *SI,
- Value *Offset, dxil::ResourceTypeInfo &RTI) {
+ dxil::ResourceTypeInfo &RTI) {
+ const DataLayout &DL = SI->getDataLayout();
IRBuilder<> Builder(SI);
Type *ContainedType = RTI.getHandleTy()->getTypeParameter(0);
+ Type *ScalarType = ContainedType->getScalarType();
Type *LoadType = StructType::get(ContainedType, Builder.getInt1Ty());
Value *V = SI->getValueOperand();
if (V->getType() == ContainedType) {
// V is already the right type.
- assert(!Offset && "store of whole element has offset?");
- } else if (V->getType() == ContainedType->getScalarType()) {
+ assert(SI->getPointerOperand() == II &&
+ "store of whole element has offset?");
+ } else if (V->getType() == ScalarType) {
// We're storing a scalar, so we need to load the current value and only
// replace the relevant part.
auto *Load = Builder.CreateIntrinsic(
@@ -88,9 +107,9 @@ static void createTypedBufferStore(IntrinsicInst *II, StoreInst *SI,
{II->getOperand(0), II->getOperand(1)});
auto *Struct = Builder.CreateExtractValue(Load, {0});
- // If we have an offset from seeing a GEP earlier, use that. Otherwise, 0.
- if (!Offset)
- Offset = ConstantInt::get(Builder.getInt32Ty(), 0);
+ uint64_t AccessSize = DL.getTypeSizeInBits(ScalarType) / 8;
+ Value *Offset =
+ traverseGEPOffsets(DL, Builder, SI->getPointerOperand(), AccessSize);
V = Builder.CreateInsertElement(Struct, V, Offset);
} else {
llvm_unreachable("Store to typed resource has invalid type");
@@ -102,12 +121,15 @@ static void createTypedBufferStore(IntrinsicInst *II, StoreInst *SI,
SI->replaceAllUsesWith(Inst);
}
-static void createRawStore(IntrinsicInst *II, StoreInst *SI, Value *Offset) {
+static void createRawStore(IntrinsicInst *II, StoreInst *SI) {
+ const DataLayout &DL = SI->getDataLayout();
IRBuilder<> Builder(SI);
- if (!Offset)
- Offset = ConstantInt::get(Builder.getInt32Ty(), 0);
Value *V = SI->getValueOperand();
+ // The offset for the rawbuffer load and store ops is always in bytes.
+ uint64_t AccessSize = 1;
+ Value *Offset =
+ traverseGEPOffsets(DL, Builder, SI->getPointerOperand(), AccessSize);
// TODO: break up larger types
auto *Inst = Builder.CreateIntrinsic(
Builder.getVoidTy(), Intrinsic::dx_resource_store_rawbuffer,
@@ -116,13 +138,13 @@ static void createRawStore(IntrinsicInst *II, StoreInst *SI, Value *Offset) {
}
static void createStoreIntrinsic(IntrinsicInst *II, StoreInst *SI,
- Value *Offset, dxil::ResourceTypeInfo &RTI) {
+ dxil::ResourceTypeInfo &RTI) {
switch (RTI.getResourceKind()) {
case dxil::ResourceKind::TypedBuffer:
- return createTypedBufferStore(II, SI, Offset, RTI);
+ return createTypedBufferStore(II, SI, RTI);
case dxil::ResourceKind::RawBuffer:
case dxil::ResourceKind::StructuredBuffer:
- return createRawStore(II, SI, Offset);
+ return createRawStore(II, SI);
case dxil::ResourceKind::Texture1D:
case dxil::ResourceKind::Texture2D:
case dxil::ResourceKind::Texture2DMS:
@@ -148,7 +170,8 @@ static void createStoreIntrinsic(IntrinsicInst *II, StoreInst *SI,
}
static void createTypedBufferLoad(IntrinsicInst *II, LoadInst *LI,
- Value *Offset, dxil::ResourceTypeInfo &RTI) {
+ dxil::ResourceTypeInfo &RTI) {
+ const DataLayout &DL = LI->getDataLayout();
IRBuilder<> Builder(LI);
Type *ContainedType = RTI.getHandleTy()->getTypeParameter(0);
Type *LoadType = StructType::get(ContainedType, Builder.getInt1Ty());
@@ -158,7 +181,12 @@ static void createTypedBufferLoad(IntrinsicInst *II, LoadInst *LI,
{II->getOperand(0), II->getOperand(1)});
V = Builder.CreateExtractValue(V, {0});
- if (Offset)
+ Type *ScalarType = ContainedType->getScalarType();
+ uint64_t AccessSize = DL.getTypeSizeInBits(ScalarType) / 8;
+ Value *Offset =
+ traverseGEPOffsets(DL, Builder, LI->getPointerOperand(), AccessSize);
+ auto *ConstantOffset = dyn_cast<ConstantInt>(Offset);
+ if (!ConstantOffset || !ConstantOffset->isZero())
V = Builder.CreateExtractElement(V, Offset);
// If we loaded a <1 x ...> instead of a scalar (presumably to feed a
@@ -171,12 +199,17 @@ static void createTypedBufferLoad(IntrinsicInst *II, LoadInst *LI,
LI->replaceAllUsesWith(V);
}
-static void createRawLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset) {
+static void createRawLoad(IntrinsicInst *II, LoadInst *LI) {
+ const DataLayout &DL = LI->getDataLayout();
IRBuilder<> Builder(LI);
+
// TODO: break up larger types
Type *LoadType = StructType::get(LI->getType(), Builder.getInt1Ty());
- if (!Offset)
- Offset = ConstantInt::get(Builder.getInt32Ty(), 0);
+
+ // The offset for the rawbuffer load and store ops is always in bytes.
+ uint64_t AccessSize = 1;
+ Value *Offset =
+ traverseGEPOffsets(DL, Builder, LI->getPointerOperand(), AccessSize);
Value *V =
Builder.CreateIntrinsic(LoadType, Intrinsic::dx_resource_load_rawbuffer,
{II->getOperand(0), II->getOperand(1), Offset});
@@ -222,7 +255,7 @@ struct CBufferRowIntrin {
};
} // namespace
-static void createCBufferLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset,
+static void createCBufferLoad(IntrinsicInst *II, LoadInst *LI,
dxil::ResourceTypeInfo &RTI) {
const DataLayout &DL = LI->getDataLayout();
@@ -238,25 +271,45 @@ static void createCBufferLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset,
ConstantInt *GlobalOffset = dyn_cast<ConstantInt>(II->getOperand(1));
assert(GlobalOffset && "CBuffer getpointer index must be constant");
- unsigned int FixedOffset = GlobalOffset->getZExtValue();
- // If we have a further constant offset we can just fold it in to the fixed
- // offset.
- if (auto *ConstOffset = dyn_cast_if_present<ConstantInt>(Offset)) {
- FixedOffset += ConstOffset->getZExtValue();
- Offset = nullptr;
- }
-
+ uint64_t GlobalOffsetVal = GlobalOffset->getZExtValue();
Value *CurrentRow = ConstantInt::get(
- Builder.getInt32Ty(), FixedOffset / hlsl::CBufferRowSizeInBytes);
- unsigned int CurrentIndex =
- (FixedOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize;
-
- assert(!(CurrentIndex && Offset) &&
- "Dynamic indexing into elements of cbuffer rows is not supported");
- // At this point if we have a non-constant offset it has to be an array
- // offset, so we can assume that it's a multiple of the row size.
- if (Offset)
- CurrentRow = FixedOffset ? Builder.CreateAdd(CurrentRow, Offset) : Offset;
+ Builder.getInt32Ty(), GlobalOffsetVal / hlsl::CBufferRowSizeInBytes);
+ unsigned int CurrentIndex;
+
+ // Every object in a cbuffer either fits in a row or is aligned to a row. This
+ // means that only the very last pointer access can point into a row.
+ auto *LastGEP = dyn_cast<GEPOperator>(LI->getPointerOperand());
+ if (!LastGEP) {
+ assert(LI->getPointerOperand() == II &&
+ "Unexpected indirect access to resource without GEP");
+ CurrentIndex =
+ (GlobalOffsetVal % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize;
+ } else {
+ Value *GEPOffset = traverseGEPOffsets(
+ DL, Builder, LastGEP->getPointerOperand(), hlsl::CBufferRowSizeInBytes);
+ CurrentRow = Builder.CreateAdd(GEPOffset, CurrentRow);
+
+ APInt ConstantOffset(DL.getIndexTypeSizeInBits(LastGEP->getType()), 0);
+ if (LastGEP->accumulateConstantOffset(DL, ConstantOffset)) {
+ APInt Remainder(DL.getIndexTypeSizeInBits(LastGEP->getType()),
+ hlsl::CBufferRowSizeInBytes);
+ APInt::udivrem(ConstantOffset, Remainder, ConstantOffset, Remainder);
+ CurrentRow = Builder.CreateAdd(
+ CurrentRow, ConstantInt::get(Builder.getInt32Ty(), ConstantOffset));
+ CurrentIndex = Remainder.udiv(Intrin.EltSize).getZExtValue();
+ } else {
+ assert(LastGEP->getNumIndices() == 1 &&
+ "Last GEP of cbuffer access is not array or struct access");
+ // We assume a non-constant access will be row-aligned. This is safe
+ // because arrays and structs are always row aligned, and accesses to
+ // vector elements will show up as a load of the vector followed by an
+ // extractelement.
+ CurrentRow = cast<ConstantInt>(CurrentRow)->isZero()
+ ? *LastGEP->idx_begin()
+ : Builder.CreateAdd(CurrentRow, *LastGEP->idx_begin());
+ CurrentIndex = 0;
+ }
+ }
auto *CBufLoad = Builder.CreateIntrinsic(
Intrin.RetTy, Intrin.IID, {Handle, CurrentRow}, nullptr, Name + ".load");
@@ -306,16 +359,16 @@ static void createCBufferLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset,
LI->replaceAllUsesWith(Result);
}
-static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
+static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI,
dxil::ResourceTypeInfo &RTI) {
switch (RTI.getResourceKind()) {
case dxil::ResourceKind::TypedBuffer:
- return createTypedBufferLoad(II, LI, Offset, RTI);
+ return createTypedBufferLoad(II, LI, RTI);
case dxil::ResourceKind::RawBuffer:
case dxil::ResourceKind::StructuredBuffer:
- return createRawLoad(II, LI, Offset);
+ return createRawLoad(II, LI);
case dxil::ResourceKind::CBuffer:
- return createCBufferLoad(II, LI, Offset, RTI);
+ return createCBufferLoad(II, LI, RTI);
case dxil::ResourceKind::Texture1D:
case dxil::ResourceKind::Texture2D:
case dxil::ResourceKind::Texture2DMS:
@@ -446,35 +499,27 @@ static void phiNodeReplacement(IntrinsicInst *II,
}
static void replaceAccess(IntrinsicInst *II, dxil::ResourceTypeInfo &RTI) {
- // Process users keeping track of indexing accumulated from GEPs.
- struct AccessAndOffset {
- User *Access;
- Value *Offset;
- };
- SmallVector<AccessAndOffset> Worklist;
+ SmallVector<User *> Worklist;
for (User *U : II->users())
- Worklist.push_back({U, nullptr});
+ Worklist.push_back(U);
SmallVector<Instruction *> DeadInsts;
while (!Worklist.empty()) {
- AccessAndOffset Current = Worklist.back();
+ User *U = Worklist.back();
Worklist.pop_back();
- if (auto *GEP = dyn_cast<GetElementPtrInst>(Current.Access)) {
- IRBuilder<> Builder(GEP);
-
- Value *Offset = calculateGEPOffset(GEP, Current.Offset, RTI);
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
for (User *U : GEP->users())
- Worklist.push_back({U, Offset});
+ Worklist.push_back(U);
DeadInsts.push_back(GEP);
- } else if (auto *SI = dyn_cast<StoreInst>(Current.Access)) {
+ } else if (auto *SI = dyn_cast<StoreInst>(U)) {
assert(SI->getValueOperand() != II && "Pointer escaped!");
- createStoreIntrinsic(II, SI, Current.Offset, RTI);
+ createStoreIntrinsic(II, SI, RTI);
DeadInsts.push_back(SI);
- } else if (auto *LI = dyn_cast<LoadInst>(Current.Access)) {
- createLoadIntrinsic(II, LI, Current.Offset, RTI);
+ } else if (auto *LI = dyn_cast<LoadInst>(U)) {
+ createLoadIntrinsic(II, LI, RTI);
DeadInsts.push_back(LI);
} else
llvm_unreachable("Unhandled instruction - pointer escaped?");
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll
index 22fba8c1d5f8c..aef3cd70bc670 100644
--- a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll
@@ -55,5 +55,22 @@ entry:
%v.i = getelementptr inbounds nuw i8, ptr %dst, i32 4
store i32 %v_load, ptr %v.i, align 4
+ ;; v[idx].q
+ ;
+ ; CHECK: [[MUL0:%.*]] = mul i32 %idx, 2
+ ; TODO: It would be nice to combine adds, but this is fine for correctness.
+ ; CHECK: [[ADD0:%.*]] = add i32 [[MUL0]], 3
+ ; CHECK: [[ADD1:%.*]] = add i32 [[ADD0]], 1
+ ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 [[ADD1]])
+ ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4
+ ; CHECK: store i32 [[X]], ptr [[PTR]]
+ %v_dyn_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 48)
+ %v_dyn_cbuf.i = getelementptr <{ %S, target("dx.Padding", 8) }>, ptr addrspace(2) %v_dyn_ptr, i32 %idx
+ %v_dyn_q.i = getelementptr inbounds nuw i8, ptr addrspace(2) %v_dyn_cbuf.i, i32 20
+ %v_dyn_load = load i32, ptr addrspace(2) %v_dyn_q.i, align 4
+ %v_dyn.i = getelementptr inbounds nuw i8, ptr %dst, i32 4
+ store i32 %v_dyn_load, ptr %v_dyn.i, align 4
+
ret void
}
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll
index 7daebaed70442..5dbe1cb88e8b2 100644
--- a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-dynamic.ll
@@ -3,15 +3,15 @@
; Tests for dynamic indices into arrays in cbuffers.
; cbuffer CB : register(b0) {
-; uint s[10]; // offset 0, size 4 (+12) * 10
-; uint t[12]; // offset 160, size 4 (+12) * 12
+; uint3 s[10]; // offset 0, size 12 (+4) * 10
+; uint t[12]; // offset 160, size 4 (+12) * 12
; }
-%__cblayout_CB = type <{ <{ [9 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12), <{ [11 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>
+%__cblayout_CB = type <{ <{ [9 x <{ <3 x i32>, target("dx.Padding", 4) }>], <3 x i32> }>, target("dx.Padding", 4), <{ [11 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>
@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
; CHECK: define void @f
-define void @f(ptr %dst, i32 %idx) {
+define void @f(ptr %dst, i32 %idx, i32 %elemidx) {
entry:
%CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null)
store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4
@@ -19,15 +19,22 @@ entry:
; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
%CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4
- ;; s[idx]
+ ;; s[idx][elemidx]
;
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 %idx)
- ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
+ ; CHECK: [[EXTRACT0:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
+ ; CHECK: [[EXTRACT1:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
+ ; CHECK: [[EXTRACT2:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
+ ; CHECK: [[UPTO0:%.*]] = insertelement <3 x i32> poison, i32 [[EXTRACT0]], i32 0
+ ; CHECK: [[UPTO1:%.*]] = insertelement <3 x i32> [[UPTO0]], i32 [[EXTRACT1]], i32 1
+ ; CHECK: [[UPTO2:%.*]] = insertelement <3 x i32> [[UPTO1]], i32 [[EXTRACT2]], i32 2
+ ; CHECK: [[X:%.*]] = extractelement <3 x i32> [[UPTO2]], i32 %elemidx
; CHECK: store i32 [[X]], ptr %dst
%s_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
- %s_gep = getelementptr <{ i32, target("dx.Padding", 12) }>, ptr addrspace(2) %s_ptr, i32 %idx
- %s_load = load i32, ptr addrspace(2) %s_gep, align 4
- store i32 %s_load, ptr %dst, align 4
+ %s_gep = getelementptr <{ <3 x i32>, target("dx.Padding", 4) }>, ptr addrspace(2) %s_ptr, i32 %idx
+ %s_load = load <3 x i32>, ptr addrspace(2) %s_gep, align 16
+ %s_extract = extractelement <3 x i32> %s_load, i32 %elemidx
+ store i32 %s_extract, ptr %dst, align 4
;; t[idx]
;
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-structbuf-geps.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-structbuf-geps.ll
new file mode 100644
index 0000000000000..b7ec0104a1623
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-structbuf-geps.ll
@@ -0,0 +1,129 @@
+; RUN: opt -S -dxil-resource-access %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.4-compute"
+
+; struct S {
+; double d;
+; uint4 v;
+; };
+%struct.S = type <{ double, <4 x i32> }>
+
+; struct T {
+; uint x;
+; S s[16];
+; uint y;
+; };
+%struct.T = type { i32, [16 x %struct.S], i32 }
+
+declare void @i32_user(i32)
+declare void @double_user(double)
+declare void @half_user(half)
+
+; CHECK-LABEL: define void @load_offset_i32
+define void @load_offset_i32(i32 %idx) {
+ %buffer = call target("dx.RawBuffer", %struct.S, 0, 0)
+ @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+
+ ;; StructuredBuffer<S> In;
+ ;; In[0].v[idx];
+ ;
+ ; Here we use a GEP to access the vector, even though that isn't necessarily
+ ; what the clang frontend will codegen.
+ ;
+ ; CHECK: [[MUL:%.*]] = mul i32 %idx, 4
+ ; CHECK: [[ADD:%.*]] = add i32 [[MUL]], 8
+ ; CHECK: [[LOAD:%.*]] = call { i32, i1 } @llvm.dx.resource.load.rawbuffer.i32.tdx.RawBuffer_s_struct.Ss_0_0t(target("dx.RawBuffer", %struct.S, 0, 0) %buffer, i32 0, i32 [[ADD]])
+ ; CHECK: [[VAL:%.*]] = extractvalue { i32, i1 } [[LOAD]], 0
+ ; CHECK: call void @i32_user(i32 [[VAL]])
+ %ptr = call ptr @llvm.dx.resource.getpointer(
+ target("dx.RawBuffer", %struct.S, 0, 0) %buffer, i32 0)
+ %s.i = getelementptr inbounds nuw i8, ptr %ptr, i32 8
+ %v.i = getelementptr i32, ptr %s.i, i32 %idx
+ %elt = load i32, ptr %v.i
+ call void @i32_user(i32 %elt)
+
+ ;; StructuredBuffer<S> In;
+ ;; In[0].v[idx];
+ ;
+ ; This matches clang's codegen, using extractelement rather than a gep.
+ ;
+ ; CHECK: [[LOAD:%.*]] = call { <4 x i32>, i1 } @llvm.dx.resource.load.rawbuffer.v4i32.tdx.RawBuffer_s_struct.Ss_0_0t(target("dx.RawBuffer", %struct.S, 0, 0) %buffer, i32 0, i32 8)
+ ; CHECK: [[VEC:%.*]] = extractvalue { <4 x i32>, i1 } [[LOAD]], 0
+ ; CHECK: [[VAL:%.*]] = extractelement <4 x i32> [[VEC]], i32 %idx
+ ; CHECK: call void @i32_user(i32 [[VAL]])
+ %ptr2 = call ptr @llvm.dx.resource.getpointer(
+ target("dx.RawBuffer", %struct.S, 0, 0) %buffer, i32 0)
+ %v2.i = getelementptr inbounds nuw i8, ptr %ptr, i32 8
+ %v2 = load <4 x i32>, ptr %v2.i
+ %elt2 = extractelement <4 x i32> %v2, i32 %idx
+ call void @i32_user(i32 %elt2)
+
+ ret void
+}
+
+; CHECK-LABEL: define void @load_double
+define void @load_double(i32 %idx) {
+ %buffer = call target("dx.RawBuffer", <4 x double>, 0, 0)
+ @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+
+ ;; StructuredBuffer<double4> In;
+ ;; In[0][idx];
+ ;
+ ; CHECK: [[MUL:%.*]] = mul i32 %idx, 8
+ ; CHECK: [[LOAD:%.*]] = call { double, i1 } @llvm.dx.resource.load.rawbuffer.f64.tdx.RawBuffer_v4f64_0_0t(target("dx.RawBuffer", <4 x double>, 0, 0) %buffer, i32 0, i32 [[MUL]])
+ ; CHECK: [[VAL:%.*]] = extractvalue { double, i1 } [[LOAD]], 0
+ ; CHECK: call void @double_user(double [[VAL]])
+ %ptr = call ptr @llvm.dx.resource.getpointer(
+ target("dx.RawBuffer", <4 x double>, 0, 0) %buffer, i32 0)
+ %v.i = getelementptr double, ptr %ptr, i32 %idx
+ %v = load double, ptr %v.i
+ call void @double_user(double %v)
+
+ ret void
+}
+
+; CHECK-LABEL: define void @load_half
+define void @load_half(i32 %idx) {
+ %buffer = call target("dx.RawBuffer", <4 x half>, 0, 0)
+ @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+
+ ;; StructuredBuffer<half4> In;
+ ;; In[0][idx];
+ ;
+ ; CHECK: [[MUL:%.*]] = mul i32 %idx, 2
+ ; CHECK: [[LOAD:%.*]] = call { half, i1 } @llvm.dx.resource.load.rawbuffer.f16.tdx.RawBuffer_v4f16_0_0t(target("dx.RawBuffer", <4 x half>, 0, 0) %buffer, i32 0, i32 [[MUL]])
+ ; CHECK: [[VAL:%.*]] = extractvalue { half, i1 } [[LOAD]], 0
+ ; CHECK: call void @half_user(half [[VAL]])
+ %ptr = call ptr @llvm.dx.resource.getpointer(
+ target("dx.RawBuffer", <4 x half>, 0, 0) %buffer, i32 0)
+ %v.i = getelementptr half, ptr %ptr, i32 %idx
+ %v = load half, ptr %v.i
+ call void @half_user(half %v)
+
+ ret void
+}
+
+; CHECK-LABEL: define void @load_nested
+define void @load_nested(i32 %idx, i32 %arrayidx, i32 %vecidx) {
+ %buffer = call target("dx.RawBuffer", %struct.T, 0, 0)
+ @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+
+ ;; StructuredBuffer<T> In;
+ ;; In[idx].s[arrayidx].v[vecidx];
+ ;
+ ; CHECK: [[MUL:%.*]] = mul i32 %arrayidx, 24
+ ; CHECK: [[ADD:%.*]] = add i32 12, [[MUL]]
+ ; CHECK: [[LOAD:%.*]] = call { <4 x i32>, i1 } @llvm.dx.resource.load.rawbuffer.v4i32.tdx.RawBuffer_s_struct.Ts_0_0t(target("dx.RawBuffer", %struct.T, 0, 0) %buffer, i32 %idx, i32 [[ADD]])
+ ; CHECK: [[VEC:%.*]] = extractvalue { <4 x i32>, i1 } [[LOAD]], 0
+ ; CHECK: [[VAL:%.*]] = extractelement <4 x i32> [[VEC]], i32 %vecidx
+ ; CHECK: call void @i32_user(i32 [[VAL]])
+ %ptr = call ptr @llvm.dx.resource.getpointer(
+ target("dx.RawBuffer", %struct.T, 0, 0) %buffer, i32 %idx)
+ %s.i = getelementptr inbounds nuw %struct.S, ptr %ptr, i32 %arrayidx
+ %v.i = getelementptr inbounds nuw i8, ptr %s.i, i32 12
+ %v = load <4 x i32>, ptr %v.i
+ %elt = extractelement <4 x i32> %v, i32 %vecidx
+ call void @i32_user(i32 %elt)
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll
index d2e5584e42650..748021df8a000 100644
--- a/llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll
@@ -30,7 +30,7 @@ define void @load_float4(i32 %index, i32 %elemindex) {
; CHECK: %[[LOAD:.*]] = call { <4 x float>, i1 } @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
; CHECK: %[[VALUE:.*]] = extractvalue { <4 x float>, i1 } %[[LOAD]], 0
; CHECK: extractelement <4 x float> %[[VALUE]], i32 %elemindex
- %dynamic = getelementptr inbounds <4 x float>, ptr %ptr, i32 0, i32 %elemindex
+ %dynamic = getelementptr inbounds float, ptr %ptr, i32 %elemindex
%dyndata = load float, ptr %dynamic
call void @use_float(float %dyndata)
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/store-structbuf-geps.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/store-structbuf-geps.ll
new file mode 100644
index 0000000000000..bb447843f5e4c
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/store-structbuf-geps.ll
@@ -0,0 +1,95 @@
+; RUN: opt -S -dxil-resource-access %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.4-compute"
+
+; struct S {
+; double d;
+; uint4 v;
+; };
+%struct.S = type <{ double, <4 x i32> }>
+
+; struct T {
+; uint x;
+; S s[16];
+; uint y;
+; };
+%struct.T = type { i32, [16 x %struct.S], i32 }
+
+; CHECK-LABEL: define void @store_offset_i32
+define void @store_offset_i32(i32 %idx, i32 %data) {
+ %buffer = call target("dx.RawBuffer", %struct.S, 0, 0)
+ @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+
+ ;; StructuredBuffer<S> In;
+ ;; In[0].v[idx] = data;
+ ;
+ ; CHECK: [[MUL:%.*]] = mul i32 %idx, 4
+ ; CHECK: [[ADD:%.*]] = add i32 [[MUL]], 8
+ ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_s_struct.Ss_0_0t.i32(target("dx.RawBuffer", %struct.S, 0, 0) %buffer, i32 0, i32 [[ADD]], i32 %data)
+ %ptr = call ptr @llvm.dx.resource.getpointer(
+ target("dx.RawBuffer", %struct.S, 0, 0) %buffer, i32 0)
+ %s.i = getelementptr inbounds nuw i8, ptr %ptr, i32 8
+ %v.i = getelementptr i32, ptr %s.i, i32 %idx
+ store i32 %data, ptr %v.i
+
+ ret void
+}
+
+; CHECK-LABEL: define void @store_double
+define void @store_double(i32 %idx, double %data) {
+ %buffer = call target("dx.RawBuffer", <4 x double>, 0, 0)
+ @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+
+ ;; StructuredBuffer<double4> In;
+ ;; In[0][idx] = data;
+ ;
+ ; CHECK: [[MUL:%.*]] = mul i32 %idx, 8
+ ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v4f64_0_0t.f64(target("dx.RawBuffer", <4 x double>, 0, 0) %buffer, i32 0, i32 [[MUL]], double %data)
+ %ptr = call ptr @llvm.dx.resource.getpointer(
+ target("dx.RawBuffer", <4 x double>, 0, 0) %buffer, i32 0)
+ %v.i = getelementptr double, ptr %ptr, i32 %idx
+ store double %data, ptr %v.i
+
+ ret void
+}
+
+; CHECK-LABEL: define void @store_half
+define void @store_half(i32 %idx, half %data) {
+ %buffer = call target("dx.RawBuffer", <4 x half>, 0, 0)
+ @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+
+ ;; StructuredBuffer<half4> In;
+ ;; In[0][idx] = data;
+ ;
+ ; CHECK: [[MUL:%.*]] = mul i32 %idx, 2
+ ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v4f16_0_0t.f16(target("dx.RawBuffer", <4 x half>, 0, 0) %buffer, i32 0, i32 [[MUL]], half %data)
+ %ptr = call ptr @llvm.dx.resource.getpointer(
+ target("dx.RawBuffer", <4 x half>, 0, 0) %buffer, i32 0)
+ %v.i = getelementptr half, ptr %ptr, i32 %idx
+ store half %data, ptr %v.i
+
+ ret void
+}
+
+; CHECK-LABEL: define void @store_nested
+define void @store_nested(i32 %idx, i32 %arrayidx, i32 %vecidx, i32 %data) {
+ %buffer = call target("dx.RawBuffer", %struct.T, 0, 0)
+ @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+
+ ;; StructuredBuffer<T> In;
+ ;; In[idx].s[arrayidx].v[vecidx];
+ ;
+ ; CHECK: [[MUL0:%.*]] = mul i32 %vecidx, 4
+ ; CHECK: [[ADD0:%.*]] = add i32 [[MUL0]], 1
+ ; CHECK: [[MUL1:%.*]] = mul i32 %arrayidx, 24
+ ; CHECK: [[ADD1:%.*]] = add i32 [[ADD0]], [[MUL1]]
+ ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_s_struct.Ts_0_0t.i32(target("dx.RawBuffer", %struct.T, 0, 0) %buffer, i32 %idx, i32 [[ADD1]], i32 %data)
+ %ptr = call ptr @llvm.dx.resource.getpointer(
+ target("dx.RawBuffer", %struct.T, 0, 0) %buffer, i32 %idx)
+ %s.i = getelementptr inbounds nuw %struct.S, ptr %ptr, i32 %arrayidx
+ %v.i = getelementptr inbounds nuw i8, ptr %s.i, i32 12
+ %i = getelementptr i32, ptr %v.i, i32 %vecidx
+ store i32 %data, ptr %i
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/store_typedbuffer.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/store_typedbuffer.ll
index f27fa65957e6a..88d96aa799425 100644
--- a/llvm/test/CodeGen/DirectX/ResourceAccess/store_typedbuffer.ll
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/store_typedbuffer.ll
@@ -37,7 +37,7 @@ define void @store_float4(<4 x float> %data, i32 %index, i32 %elemindex) {
; CHECK: %[[VEC:.*]] = extractvalue { <4 x float>, i1 } %[[LOAD]], 0
; CHECK: %[[INSERT:.*]] = insertelement <4 x float> %[[VEC]], float %scalar, i32 %elemindex
; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index, <4 x float> %[[INSERT]])
- %dynamic = getelementptr inbounds <4 x float>, ptr %ptr, i32 0, i32 %elemindex
+ %dynamic = getelementptr inbounds float, ptr %ptr, i32 %elemindex
store float %scalar, ptr %dynamic
ret void
>From 35dd3c773156e122f4f3e64978e96e7f3b107788 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Wed, 17 Dec 2025 14:50:01 -0700
Subject: [PATCH 2/3] assert message and comment improvements
---
llvm/lib/Target/DirectX/DXILResourceAccess.cpp | 4 +++-
.../DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll | 3 ++-
2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
index 234cba5997305..e762bdf10de92 100644
--- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
@@ -98,7 +98,7 @@ static void createTypedBufferStore(IntrinsicInst *II, StoreInst *SI,
if (V->getType() == ContainedType) {
// V is already the right type.
assert(SI->getPointerOperand() == II &&
- "store of whole element has offset?");
+ "Store of whole element has mismatched address to store to");
} else if (V->getType() == ScalarType) {
// We're storing a scalar, so we need to load the current value and only
// replace the relevant part.
@@ -280,6 +280,8 @@ static void createCBufferLoad(IntrinsicInst *II, LoadInst *LI,
// means that only the very last pointer access can point into a row.
auto *LastGEP = dyn_cast<GEPOperator>(LI->getPointerOperand());
if (!LastGEP) {
+ // If we don't have a GEP at all we're just accessing the resource through
+ // the result of getpointer directly.
assert(LI->getPointerOperand() == II &&
"Unexpected indirect access to resource without GEP");
CurrentIndex =
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll
index aef3cd70bc670..94e2f39a1a20f 100644
--- a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll
@@ -58,7 +58,8 @@ entry:
;; v[idx].q
;
; CHECK: [[MUL0:%.*]] = mul i32 %idx, 2
- ; TODO: It would be nice to combine adds, but this is fine for correctness.
+ ;; Note: It would be nice to combine consecutive adds, but this is fine as is
+ ;; as far as correctness is concerned..
; CHECK: [[ADD0:%.*]] = add i32 [[MUL0]], 3
; CHECK: [[ADD1:%.*]] = add i32 [[ADD0]], 1
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 [[ADD1]])
>From 80c91e5e02d2220bdbee79bf65808c1a8fe438c8 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Thu, 18 Dec 2025 14:43:59 -0700
Subject: [PATCH 3/3] Remove unhelpful assert
---
llvm/lib/Target/DirectX/DXILResourceAccess.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
index e762bdf10de92..37a7c0f572c69 100644
--- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
@@ -61,7 +61,6 @@ static Value *traverseGEPOffsets(const DataLayout &DL, IRBuilder<> &Builder,
assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 &&
"GEP is not indexing through pointer");
GEPOffset = *(++IndexIt);
- assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP");
} else
llvm_unreachable("Unhandled GEP structure for resource access");
More information about the llvm-commits
mailing list