[llvm] [DirectX] Split long vectors in DXILResourceAccess (PR #184732)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 4 19:49:50 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-directx
Author: Justin Bogner (bogner)
<details>
<summary>Changes</summary>
If a vector has more than 4 elements, the `resource.load` and `resource.store`
intrinsics aren't able to handle it. Split these into multiple calls.
Fixes #<!-- -->167542
---
Full diff: https://github.com/llvm/llvm-project/pull/184732.diff
3 Files Affected:
- (modified) llvm/lib/Target/DirectX/DXILResourceAccess.cpp (+88-33)
- (added) llvm/test/CodeGen/DirectX/ResourceAccess/load-rawbuffer-wide.ll (+71)
- (added) llvm/test/CodeGen/DirectX/ResourceAccess/store-rawbuffer-wide.ll (+58)
``````````diff
diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
index 2896c68d11de7..f939dea85de6e 100644
--- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
@@ -10,6 +10,7 @@
#include "DirectX.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/DXILResource.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/Frontend/HLSL/HLSLResource.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -142,8 +143,24 @@ static void createTypedBufferStore(IntrinsicInst *II, StoreInst *SI,
SI->replaceAllUsesWith(Inst);
}
-static void createRawStore(IntrinsicInst *II, StoreInst *SI,
- dxil::ResourceTypeInfo &RTI) {
+static void emitRawStore(IRBuilder<> &Builder, Value *Buffer, Value *Index,
+ Value *Offset, Value *V, dxil::ResourceTypeInfo &RTI) {
+ // For raw buffer (ie, HLSL's ByteAddressBuffer), we need to fold the access
+ // entirely into the index.
+ if (!RTI.isStruct()) {
+ auto *ConstantOffset = dyn_cast<ConstantInt>(Offset);
+ if (!ConstantOffset || !ConstantOffset->isZero())
+ Index = Builder.CreateAdd(Index, Offset);
+ Offset = llvm::PoisonValue::get(Builder.getInt32Ty());
+ }
+
+ Builder.CreateIntrinsic(Builder.getVoidTy(),
+ Intrinsic::dx_resource_store_rawbuffer,
+ {Buffer, Index, Offset, V});
+}
+
+static void createRawStores(IntrinsicInst *II, StoreInst *SI,
+ dxil::ResourceTypeInfo &RTI) {
const DataLayout &DL = SI->getDataLayout();
IRBuilder<> Builder(SI);
@@ -157,19 +174,27 @@ static void createRawStore(IntrinsicInst *II, StoreInst *SI,
Value *Offset =
traverseGEPOffsets(DL, Builder, SI->getPointerOperand(), AccessSize);
- // For raw buffer (ie, HLSL's ByteAddressBuffer), we need to fold the access
- // entirely into the index.
- if (!RTI.isStruct()) {
- auto *ConstantOffset = dyn_cast<ConstantInt>(Offset);
- if (!ConstantOffset || !ConstantOffset->isZero())
- Index = Builder.CreateAdd(Index, Offset);
- Offset = llvm::PoisonValue::get(Builder.getInt32Ty());
- }
+ auto *VT = dyn_cast<FixedVectorType>(V->getType());
+ if (VT && VT->getNumElements() > 4) {
+ // Split into stores of at most 4 elements.
+ Type *EltTy = VT->getElementType();
+ Value *Stride = ConstantInt::get(Builder.getInt32Ty(),
+ 4 * (DL.getTypeSizeInBits(EltTy) / 8));
- auto *Inst = Builder.CreateIntrinsic(Builder.getVoidTy(),
- Intrinsic::dx_resource_store_rawbuffer,
- {II->getOperand(0), Index, Offset, V});
- SI->replaceAllUsesWith(Inst);
+ SmallVector<int, 4> Indices;
+ for (unsigned int I = 0, N = VT->getNumElements(); I < N; I += 4) {
+ if (I > 0)
+ Offset = Builder.CreateAdd(Offset, Stride);
+
+ for (unsigned int J = I, E = std::min(N, J + 4); J < E; ++J)
+ Indices.push_back(J);
+ Value *Part = Builder.CreateShuffleVector(V, Indices);
+ emitRawStore(Builder, II->getOperand(0), Index, Offset, Part, RTI);
+
+ Indices.clear();
+ }
+ } else
+ emitRawStore(Builder, II->getOperand(0), Index, Offset, V, RTI);
}
static void createStoreIntrinsic(IntrinsicInst *II, StoreInst *SI,
@@ -179,7 +204,7 @@ static void createStoreIntrinsic(IntrinsicInst *II, StoreInst *SI,
return createTypedBufferStore(II, SI, RTI);
case dxil::ResourceKind::RawBuffer:
case dxil::ResourceKind::StructuredBuffer:
- return createRawStore(II, SI, RTI);
+ return createRawStores(II, SI, RTI);
case dxil::ResourceKind::Texture1D:
case dxil::ResourceKind::Texture2D:
case dxil::ResourceKind::Texture2DMS:
@@ -234,34 +259,64 @@ static void createTypedBufferLoad(IntrinsicInst *II, LoadInst *LI,
LI->replaceAllUsesWith(V);
}
-static void createRawLoad(IntrinsicInst *II, LoadInst *LI,
+static Value *emitRawLoad(IRBuilder<> &Builder, Type *Ty, Value *Buffer,
+ Value *Index, Value *Offset,
dxil::ResourceTypeInfo &RTI) {
+ // For raw buffer (ie, HLSL's ByteAddressBuffer), we need to fold the access
+ // entirely into the index.
+ if (!RTI.isStruct()) {
+ auto *ConstantOffset = dyn_cast<ConstantInt>(Offset);
+ if (!ConstantOffset || !ConstantOffset->isZero())
+ Index = Builder.CreateAdd(Index, Offset);
+ Offset = llvm::PoisonValue::get(Builder.getInt32Ty());
+ }
+
+ // The load intrinsic includes the bit for CheckAccessFullyMapped, so we need
+ // to add that to the return type.
+ Type *TypeWithCheck = StructType::get(Ty, Builder.getInt1Ty());
+ Value *V = Builder.CreateIntrinsic(TypeWithCheck,
+ Intrinsic::dx_resource_load_rawbuffer,
+ {Buffer, Index, Offset});
+ return Builder.CreateExtractValue(V, {0});
+}
+
+static void createRawLoads(IntrinsicInst *II, LoadInst *LI,
+ dxil::ResourceTypeInfo &RTI) {
const DataLayout &DL = LI->getDataLayout();
IRBuilder<> Builder(LI);
- Type *LoadType = StructType::get(LI->getType(), Builder.getInt1Ty());
- assert(!LI->getType()->isAggregateType() &&
- "Resource load should be scalar or vector type");
-
Value *Index = II->getOperand(1);
// The offset for the rawbuffer load and store ops is always in bytes.
uint64_t AccessSize = 1;
Value *Offset =
traverseGEPOffsets(DL, Builder, LI->getPointerOperand(), AccessSize);
- // For raw buffer (ie, HLSL's ByteAddressBuffer), we need to fold the access
- // entirely into the index.
- if (!RTI.isStruct()) {
- auto *ConstantOffset = dyn_cast<ConstantInt>(Offset);
- if (!ConstantOffset || !ConstantOffset->isZero())
- Index = Builder.CreateAdd(Index, Offset);
- Offset = llvm::PoisonValue::get(Builder.getInt32Ty());
- }
+ // TODO: We could make this handle aggregates by walking the structure and
+ // handling each field individually, but we don't ever generate code that
+ // would hit that so it seems superfluous.
+ assert(!LI->getType()->isAggregateType() &&
+ "Resource load should be scalar or vector type");
- Value *V =
- Builder.CreateIntrinsic(LoadType, Intrinsic::dx_resource_load_rawbuffer,
- {II->getOperand(0), Index, Offset});
- V = Builder.CreateExtractValue(V, {0});
+ Value *V;
+ if (auto *VT = dyn_cast<FixedVectorType>(LI->getType())) {
+ // Split into loads of at most 4 elements.
+ Type *EltTy = VT->getElementType();
+ Value *Stride = ConstantInt::get(Builder.getInt32Ty(),
+ 4 * (DL.getTypeSizeInBits(EltTy) / 8));
+
+ SmallVector<Value *> Parts;
+ for (unsigned int I = 0, N = VT->getNumElements(); I < N; I += 4) {
+ Type *Ty = FixedVectorType::get(EltTy, N - I < 4 ? N - I : 4);
+ if (I > 0)
+ Offset = Builder.CreateAdd(Offset, Stride);
+ Parts.push_back(
+ emitRawLoad(Builder, Ty, II->getOperand(0), Index, Offset, RTI));
+ }
+
+ V = Parts.size() > 1 ? concatenateVectors(Builder, Parts) : Parts[0];
+ } else
+ V = emitRawLoad(Builder, LI->getType(), II->getOperand(0), Index, Offset,
+ RTI);
LI->replaceAllUsesWith(V);
}
@@ -415,7 +470,7 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI,
return createTypedBufferLoad(II, LI, RTI);
case dxil::ResourceKind::RawBuffer:
case dxil::ResourceKind::StructuredBuffer:
- return createRawLoad(II, LI, RTI);
+ return createRawLoads(II, LI, RTI);
case dxil::ResourceKind::CBuffer:
return createCBufferLoad(II, LI, RTI);
case dxil::ResourceKind::Texture1D:
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-rawbuffer-wide.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-rawbuffer-wide.ll
new file mode 100644
index 0000000000000..4c767d93f08db
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-rawbuffer-wide.ll
@@ -0,0 +1,71 @@
+; RUN: opt -S -dxil-resource-access %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+declare void @v8f32_user(<8 x float>)
+declare void @v7f32_user(<7 x float>)
+
+; CHECK-LABEL: define void @loadfloat4x2_struct
+define void @loadfloat4x2_struct(i32 %index) {
+ %buffer = call target("dx.RawBuffer", [2 x <4 x float>], 1, 0)
+ @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+
+ ; CHECK-NOT: @llvm.dx.resource.getpointer
+ %ptr = call ptr @llvm.dx.resource.getpointer(
+ target("dx.RawBuffer", [2 x <4 x float>], 1, 0) %buffer, i32 %index)
+
+ ; CHECK: %[[LOAD1:.*]] = call { <4 x float>, i1 } @llvm.dx.resource.load.rawbuffer.v4f32.tdx.RawBuffer_a2v4f32_1_0t(target("dx.RawBuffer", [2 x <4 x float>], 1, 0) %buffer, i32 %index, i32 0)
+ ; CHECK: %[[VAL1:.*]] = extractvalue { <4 x float>, i1 } %[[LOAD1]], 0
+ ; CHECK: %[[LOAD2:.*]] = call { <4 x float>, i1 } @llvm.dx.resource.load.rawbuffer.v4f32.tdx.RawBuffer_a2v4f32_1_0t(target("dx.RawBuffer", [2 x <4 x float>], 1, 0) %buffer, i32 %index, i32 16)
+ ; CHECK: %[[VAL2:.*]] = extractvalue { <4 x float>, i1 } %[[LOAD2]], 0
+ ; CHECK: %[[MERGED:.*]] = shufflevector <4 x float> %[[VAL1]], <4 x float> %[[VAL2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ; CHECK: call void @v8f32_user(<8 x float> %[[MERGED]])
+ %data = load <8 x float>, ptr %ptr
+ call void @v8f32_user(<8 x float> %data)
+
+ ret void
+}
+
+; CHECK-LABEL: define void @loadfloat4x2_byte
+define void @loadfloat4x2_byte(i32 %index) {
+ %buffer = call target("dx.RawBuffer", i8, 1, 0)
+ @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+
+ ; CHECK-NOT: @llvm.dx.resource.getpointer
+ %ptr = call ptr @llvm.dx.resource.getpointer(
+ target("dx.RawBuffer", i8, 1, 0) %buffer, i32 %index)
+
+ ; CHECK: %[[LOAD1:.*]] = call { <4 x float>, i1 } @llvm.dx.resource.load.rawbuffer.v4f32.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) %buffer, i32 %index, i32 poison)
+ ; CHECK: %[[VAL1:.*]] = extractvalue { <4 x float>, i1 } %[[LOAD1]], 0
+ ; CHECK: %[[NEXTINDEX:.*]] = add i32 %index, 16
+ ; CHECK: %[[LOAD2:.*]] = call { <4 x float>, i1 } @llvm.dx.resource.load.rawbuffer.v4f32.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) %buffer, i32 %[[NEXTINDEX]], i32 poison)
+ ; CHECK: %[[VAL2:.*]] = extractvalue { <4 x float>, i1 } %[[LOAD2]], 0
+ ; CHECK: %[[MERGED:.*]] = shufflevector <4 x float> %[[VAL1]], <4 x float> %[[VAL2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ; CHECK: call void @v8f32_user(<8 x float> %[[MERGED]])
+ %data = load <8 x float>, ptr %ptr
+ call void @v8f32_user(<8 x float> %data)
+
+ ret void
+}
+
+; CHECK-LABEL: define void @loadfloat7
+define void @loadfloat7(i32 %index) {
+ %buffer = call target("dx.RawBuffer", <7 x float>, 1, 0)
+ @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+
+ ; CHECK-NOT: @llvm.dx.resource.getpointer
+ %ptr = call ptr @llvm.dx.resource.getpointer(
+ target("dx.RawBuffer", <7 x float>, 1, 0) %buffer, i32 %index)
+
+ ; CHECK: %[[LOAD1:.*]] = call { <4 x float>, i1 } @llvm.dx.resource.load.rawbuffer.v4f32.tdx.RawBuffer_v7f32_1_0t(target("dx.RawBuffer", <7 x float>, 1, 0) %buffer, i32 %index, i32 0)
+ ; CHECK: %[[VAL1:.*]] = extractvalue { <4 x float>, i1 } %[[LOAD1]], 0
+ ; CHECK: %[[LOAD2:.*]] = call { <3 x float>, i1 } @llvm.dx.resource.load.rawbuffer.v3f32.tdx.RawBuffer_v7f32_1_0t(target("dx.RawBuffer", <7 x float>, 1, 0) %buffer, i32 %index, i32 16)
+ ; CHECK: %[[VAL2:.*]] = extractvalue { <3 x float>, i1 } %[[LOAD2]], 0
+ ; CHECK: %[[TMP:.*]] = shufflevector <3 x float> %[[VAL2]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+ ; CHECK: %[[MERGED:.*]] = shufflevector <4 x float> %[[VAL1]], <4 x float> %[[TMP]], <7 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
+ ; CHECK: call void @v7f32_user(<7 x float> %[[MERGED]])
+ %data = load <7 x float>, ptr %ptr
+ call void @v7f32_user(<7 x float> %data)
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/store-rawbuffer-wide.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/store-rawbuffer-wide.ll
new file mode 100644
index 0000000000000..097db741331c5
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/store-rawbuffer-wide.ll
@@ -0,0 +1,58 @@
+; RUN: opt -S -dxil-resource-access %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+; CHECK-LABEL: define void @storefloat4x2_struct
+define void @storefloat4x2_struct(i32 %index, <8 x float> %data) {
+ %buffer = call target("dx.RawBuffer", [2 x <4 x float>], 1, 0)
+ @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+
+ ; CHECK-NOT: @llvm.dx.resource.getpointer
+ %ptr = call ptr @llvm.dx.resource.getpointer(
+ target("dx.RawBuffer", [2 x <4 x float>], 1, 0) %buffer, i32 %index)
+
+ ; CHECK: %[[DATA1:.*]] = shufflevector <8 x float> %data, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_a2v4f32_1_0t.v4f32(target("dx.RawBuffer", [2 x <4 x float>], 1, 0) %buffer, i32 %index, i32 0, <4 x float> %[[DATA1]])
+ ; CHECK: %[[DATA2:.*]] = shufflevector <8 x float> %data, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_a2v4f32_1_0t.v4f32(target("dx.RawBuffer", [2 x <4 x float>], 1, 0) %buffer, i32 %index, i32 16, <4 x float> %[[DATA2]])
+ store <8 x float> %data, ptr %ptr
+
+ ret void
+}
+
+; CHECK-LABEL: define void @storefloat4x2_byte
+define void @storefloat4x2_byte(i32 %index, <8 x float> %data) {
+ %buffer = call target("dx.RawBuffer", i8, 1, 0)
+ @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+
+ ; CHECK-NOT: @llvm.dx.resource.getpointer
+ %ptr = call ptr @llvm.dx.resource.getpointer(
+ target("dx.RawBuffer", i8, 1, 0) %buffer, i32 %index)
+
+ ; CHECK: %[[DATA1:.*]] = shufflevector <8 x float> %data, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i8_1_0t.v4f32(target("dx.RawBuffer", i8, 1, 0) %buffer, i32 %index, i32 poison, <4 x float> %[[DATA1]])
+ ; CHECK: %[[DATA2:.*]] = shufflevector <8 x float> %data, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ ; CHECK: %[[NEXTINDEX:.*]] = add i32 %index, 16
+ ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i8_1_0t.v4f32(target("dx.RawBuffer", i8, 1, 0) %buffer, i32 %[[NEXTINDEX]], i32 poison, <4 x float> %[[DATA2]])
+ store <8 x float> %data, ptr %ptr
+
+ ret void
+}
+
+; CHECK-LABEL: define void @storev7f32
+define void @storev7f32(i32 %index, <7 x float> %data) {
+ %buffer = call target("dx.RawBuffer", <7 x float>, 1, 0)
+ @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+
+ ; CHECK-NOT: @llvm.dx.resource.getpointer
+ %ptr = call ptr @llvm.dx.resource.getpointer(
+ target("dx.RawBuffer", <7 x float>, 1, 0) %buffer, i32 %index)
+
+ ; CHECK: %[[DATA1:.*]] = shufflevector <7 x float> %data, <7 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v7f32_1_0t.v4f32(target("dx.RawBuffer", <7 x float>, 1, 0) %buffer, i32 %index, i32 0, <4 x float> %[[DATA1]])
+ ; CHECK: %[[DATA2:.*]] = shufflevector <7 x float> %data, <7 x float> poison, <3 x i32> <i32 4, i32 5, i32 6>
+ ; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v7f32_1_0t.v3f32(target("dx.RawBuffer", <7 x float>, 1, 0) %buffer, i32 %index, i32 16, <3 x float> %[[DATA2]])
+ store <7 x float> %data, ptr %ptr
+
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/184732
More information about the llvm-commits
mailing list