[llvm] 0e51b54 - [DirectX] Implement the resource.store.rawbuffer intrinsic (#121282)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 12 17:52:24 PST 2025
Author: Justin Bogner
Date: 2025-01-12T18:52:20-07:00
New Revision: 0e51b54b7ac02b0920e20b8ccae26b32bd6b6982
URL: https://github.com/llvm/llvm-project/commit/0e51b54b7ac02b0920e20b8ccae26b32bd6b6982
DIFF: https://github.com/llvm/llvm-project/commit/0e51b54b7ac02b0920e20b8ccae26b32bd6b6982.diff
LOG: [DirectX] Implement the resource.store.rawbuffer intrinsic (#121282)
This introduces `@llvm.dx.resource.store.rawbuffer` and generalizes the
buffer store docs under DirectX/DXILResources.
Fixes #106188
Added:
llvm/test/CodeGen/DirectX/BufferStore-sm61.ll
llvm/test/CodeGen/DirectX/RawBufferStore-error64.ll
llvm/test/CodeGen/DirectX/RawBufferStore.ll
Modified:
llvm/docs/DirectX/DXILResources.rst
llvm/include/llvm/IR/IntrinsicsDirectX.td
llvm/lib/Target/DirectX/DXIL.td
llvm/lib/Target/DirectX/DXILOpLowering.cpp
Removed:
################################################################################
diff --git a/llvm/docs/DirectX/DXILResources.rst b/llvm/docs/DirectX/DXILResources.rst
index 857d29e48363b0..80e3c2c11153d4 100644
--- a/llvm/docs/DirectX/DXILResources.rst
+++ b/llvm/docs/DirectX/DXILResources.rst
@@ -491,26 +491,28 @@ Examples:
i32 %byte_offset,
i32 0)
-Texture and Typed Buffer Stores
--------------------------------
+Stores
+------
-*relevant types: Textures and TypedBuffer*
+*relevant types: Textures and Buffer*
-The `TextureStore`_ and `BufferStore`_ DXIL operations always write all four
-32-bit components to a texture or a typed buffer. While both operations include
-a mask parameter, it is specified that the mask must cover all components when
-used with these types.
+The `TextureStore`_, `BufferStore`_, and `RawBufferStore`_ DXIL operations
+write four components to a texture or a buffer. These include a mask argument
+that is used when fewer than 4 components are written, but notably this only
+takes on the contiguous x, xy, xyz, and xyzw values.
-The store operations that we define as intrinsics behave similarly, and will
-only accept writes to the whole of the contained type. This
diff ers from the
-loads above, but this makes sense to do from a semantics preserving point of
-view. Thus, texture and buffer stores may only operate on 4-element vectors of
-types that are 32-bits or fewer, such as ``<4 x i32>``, ``<4 x float>``, and
-``<4 x half>``, and 2 element vectors of 64-bit types like ``<2 x double>`` and
-``<2 x i64>``.
+We define the LLVM store intrinsics to accept vectors when storing multiple
+components rather than using `undef` and a mask, but otherwise match the DXIL
+ops fairly closely.
-.. _BufferStore: https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#bufferstore
.. _TextureStore: https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#texturestore
+.. _BufferStore: https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#bufferstore
+.. _RawBufferStore: https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#rawbufferstore
+
+For TypedBuffer, we only need one coordinate, and we must always write a vector
+since partial writes aren't possible. Similarly to the load operations
+described above, we handle 64-bit types specially and only handle 2-element
+vectors rather than 4.
Examples:
@@ -548,3 +550,85 @@ Examples:
target("dx.TypedBuffer", f16, 1, 0) %buf, i32 %index, <4 x f16> %data)
call void @llvm.dx.resource.store.typedbuffer.tdx.Buffer_v2f64_1_0_0t(
target("dx.TypedBuffer", f64, 1, 0) %buf, i32 %index, <2 x f64> %data)
+
+For RawBuffer, we need two indices and we accept scalars and vectors of 4 or
+fewer elements. Note that we do allow vectors of 4 64-bit elements here.
+
+Examples:
+
+.. list-table:: ``@llvm.dx.resource.store.rawbuffer``
+ :header-rows: 1
+
+ * - Argument
+ -
+ - Type
+ - Description
+ * - Return value
+ -
+ - ``void``
+ -
+ * - ``%buffer``
+ - 0
+ - ``target(dx.RawBuffer, ...)``
+ - The buffer to store into
+ * - ``%index``
+ - 1
+ - ``i32``
+ - Index into the buffer
+ * - ``%offset``
+ - 2
+ - ``i32``
+ - Byte offset into structured buffer elements
+ * - ``%data``
+ - 3
+ - Scalar or vector
+ - The data to store
+
+Examples:
+
+.. code-block:: llvm
+
+ ; float
+ call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f32_1_0_0t.f32(
+ target("dx.RawBuffer", float, 1, 0, 0) %buffer,
+ i32 %index, i32 0, float %data)
+ call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i8_1_0_0t.f32(
+ target("dx.RawBuffer", i8, 1, 0, 0) %buffer,
+ i32 %index, i32 0, float %data)
+
+ ; float4
+ call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v4f32_1_0_0t.v4f32(
+ target("dx.RawBuffer", <4 x float>, 1, 0, 0) %buffer,
+ i32 %index, i32 0, <4 x float> %data)
+ call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i8_1_0_0t.v4f32(
+ target("dx.RawBuffer", i8, 1, 0, 0) %buffer,
+ i32 %index, i32 0, <4 x float> %data)
+
+ ; struct S0 { float4 f; int4 i; }
+ call void @llvm.dx.resource.store.rawbuffer.v4f32(
+ target("dx.RawBuffer", { <4 x float>, <4 x i32> }, 1, 0, 0) %buffer,
+ i32 %index, i32 0, <4 x float> %data0)
+ call void @llvm.dx.resource.store.rawbuffer.v4i32(
+ target("dx.RawBuffer", { <4 x float>, <4 x i32> }, 1, 0, 0) %buffer,
+ i32 %index, i32 16, <4 x i32> %data1)
+
+ ; struct Q { float4 f; int3 i; }
+ ; struct R { int z; S x; }
+ call void @llvm.dx.resource.store.rawbuffer.i32(
+ target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0)
+ %buffer,
+ i32 %index, i32 0, i32 %data0)
+ call void @llvm.dx.resource.store.rawbuffer.v4f32(
+ target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0)
+ %buffer,
+ i32 %index, i32 4, <4 x float> %data1)
+ call void @llvm.dx.resource.store.rawbuffer.v3f16(
+ target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0)
+ %buffer,
+ i32 %index, i32 20, <3 x half> %data2)
+
+ ; byteaddressbuf.Store<int64_t4>
+ call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i8_1_0_0t.v4f64(
+ target("dx.RawBuffer", i8, 1, 0, 0) %buffer,
+ i32 %index, i32 0, <4 x double> %data)
+
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index ef48af5b42dbf1..2a56ba78ce88e8 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -40,6 +40,10 @@ def int_dx_resource_load_rawbuffer
: DefaultAttrsIntrinsic<[llvm_any_ty, llvm_i1_ty],
[llvm_any_ty, llvm_i32_ty, llvm_i32_ty],
[IntrReadMem]>;
+def int_dx_resource_store_rawbuffer
+ : DefaultAttrsIntrinsic<
+ [], [llvm_any_ty, llvm_i32_ty, llvm_i32_ty, llvm_any_ty],
+ [IntrWriteMem]>;
def int_dx_resource_updatecounter
: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i8_ty],
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 62b5b704e99eb2..6fdd83c4dc8778 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -909,6 +909,26 @@ def RawBufferLoad : DXILOp<139, rawBufferLoad> {
let stages = [Stages<DXIL1_2, [all_stages]>];
}
+def RawBufferStore : DXILOp<140, rawBufferStore> {
+ let Doc = "writes to a RWByteAddressBuffer or RWStructuredBuffer";
+ // Handle, Coord0, Coord1, Val0, Val1, Val2, Val3, Mask, Alignment
+ let arguments = [
+ HandleTy, Int32Ty, Int32Ty, OverloadTy, OverloadTy, OverloadTy, OverloadTy,
+ Int8Ty, Int32Ty
+ ];
+ let result = VoidTy;
+ let overloads = [
+ Overloads<DXIL1_2,
+ [ResRetHalfTy, ResRetFloatTy, ResRetInt16Ty, ResRetInt32Ty]>,
+ Overloads<DXIL1_3,
+ [
+ ResRetHalfTy, ResRetFloatTy, ResRetDoubleTy, ResRetInt16Ty,
+ ResRetInt32Ty, ResRetInt64Ty
+ ]>
+ ];
+ let stages = [Stages<DXIL1_2, [all_stages]>];
+}
+
def Dot4AddI8Packed : DXILOp<163, dot4AddPacked> {
let Doc = "signed dot product of 4 x i8 vectors packed into i32, with "
"accumulate to i32";
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index f43815bf211664..0c245c1a43d31c 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -616,7 +616,10 @@ class OpLowerer {
return false;
}
- [[nodiscard]] bool lowerTypedBufferStore(Function &F) {
+ [[nodiscard]] bool lowerBufferStore(Function &F, bool IsRaw) {
+ Triple TT(Triple(M.getTargetTriple()));
+ VersionTuple DXILVersion = TT.getDXILVersion();
+ const DataLayout &DL = F.getDataLayout();
IRBuilder<> &IRB = OpBuilder.getIRB();
Type *Int8Ty = IRB.getInt8Ty();
Type *Int32Ty = IRB.getInt32Ty();
@@ -627,51 +630,75 @@ class OpLowerer {
Value *Handle =
createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
Value *Index0 = CI->getArgOperand(1);
- Value *Index1 = UndefValue::get(Int32Ty);
- // For typed stores, the mask must always cover all four elements.
- Constant *Mask = ConstantInt::get(Int8Ty, 0xF);
+ Value *Index1 = IsRaw ? CI->getArgOperand(2) : UndefValue::get(Int32Ty);
+
+ Value *Data = CI->getArgOperand(IsRaw ? 3 : 2);
+ Type *DataTy = Data->getType();
+ Type *ScalarTy = DataTy->getScalarType();
- Value *Data = CI->getArgOperand(2);
- auto *DataTy = dyn_cast<FixedVectorType>(Data->getType());
- if (!DataTy || DataTy->getNumElements() != 4)
+ uint64_t NumElements =
+ DL.getTypeSizeInBits(DataTy) / DL.getTypeSizeInBits(ScalarTy);
+ Value *Mask = ConstantInt::get(Int8Ty, ~(~0U << NumElements));
+
+ // TODO: check that we only have vector or scalar...
+ if (!IsRaw && NumElements != 4)
return make_error<StringError>(
"typedBufferStore data must be a vector of 4 elements",
inconvertibleErrorCode());
+ else if (NumElements > 4)
+ return make_error<StringError>(
+ "rawBufferStore data must have at most 4 elements",
+ inconvertibleErrorCode());
- // Since we're post-scalarizer, we likely have a vector that's constructed
- // solely for the argument of the store. If so, just use the scalar values
- // from before they're inserted into the temporary.
std::array<Value *, 4> DataElements{nullptr, nullptr, nullptr, nullptr};
- auto *IEI = dyn_cast<InsertElementInst>(Data);
- while (IEI) {
- auto *IndexOp = dyn_cast<ConstantInt>(IEI->getOperand(2));
- if (!IndexOp)
- break;
- size_t IndexVal = IndexOp->getZExtValue();
- assert(IndexVal < 4 && "Too many elements for buffer store");
- DataElements[IndexVal] = IEI->getOperand(1);
- IEI = dyn_cast<InsertElementInst>(IEI->getOperand(0));
+ if (DataTy == ScalarTy)
+ DataElements[0] = Data;
+ else {
+ // Since we're post-scalarizer, if we see a vector here it's likely
+ // constructed solely for the argument of the store. Just use the scalar
+ // values from before they're inserted into the temporary.
+ auto *IEI = dyn_cast<InsertElementInst>(Data);
+ while (IEI) {
+ auto *IndexOp = dyn_cast<ConstantInt>(IEI->getOperand(2));
+ if (!IndexOp)
+ break;
+ size_t IndexVal = IndexOp->getZExtValue();
+ assert(IndexVal < 4 && "Too many elements for buffer store");
+ DataElements[IndexVal] = IEI->getOperand(1);
+ IEI = dyn_cast<InsertElementInst>(IEI->getOperand(0));
+ }
}
// If for some reason we weren't able to forward the arguments from the
- // scalarizer artifact, then we need to actually extract elements from the
- // vector.
- for (int I = 0, E = 4; I != E; ++I)
+ // scalarizer artifact, then we may need to actually extract elements from
+ // the vector.
+ for (int I = 0, E = NumElements; I < E; ++I)
if (DataElements[I] == nullptr)
DataElements[I] =
IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, I));
+ // For any elements beyond the length of the vector, fill up with undef.
+ for (int I = NumElements, E = 4; I < E; ++I)
+ if (DataElements[I] == nullptr)
+ DataElements[I] = UndefValue::get(ScalarTy);
- std::array<Value *, 8> Args{
+ dxil::OpCode Op = OpCode::BufferStore;
+ SmallVector<Value *, 9> Args{
Handle, Index0, Index1, DataElements[0],
DataElements[1], DataElements[2], DataElements[3], Mask};
+ if (IsRaw && DXILVersion >= VersionTuple(1, 2)) {
+ Op = OpCode::RawBufferStore;
+ // RawBufferStore requires the alignment
+ Args.push_back(
+ ConstantInt::get(Int32Ty, DL.getPrefTypeAlign(ScalarTy).value()));
+ }
Expected<CallInst *> OpCall =
- OpBuilder.tryCreateOp(OpCode::BufferStore, Args, CI->getName());
+ OpBuilder.tryCreateOp(Op, Args, CI->getName());
if (Error E = OpCall.takeError())
return E;
CI->eraseFromParent();
// Clean up any leftover `insertelement`s
- IEI = dyn_cast<InsertElementInst>(Data);
+ auto *IEI = dyn_cast<InsertElementInst>(Data);
while (IEI && IEI->use_empty()) {
InsertElementInst *Tmp = IEI;
IEI = dyn_cast<InsertElementInst>(IEI->getOperand(0));
@@ -776,11 +803,14 @@ class OpLowerer {
HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/true);
break;
case Intrinsic::dx_resource_store_typedbuffer:
- HasErrors |= lowerTypedBufferStore(F);
+ HasErrors |= lowerBufferStore(F, /*IsRaw=*/false);
break;
case Intrinsic::dx_resource_load_rawbuffer:
HasErrors |= lowerRawBufferLoad(F);
break;
+ case Intrinsic::dx_resource_store_rawbuffer:
+ HasErrors |= lowerBufferStore(F, /*IsRaw=*/true);
+ break;
case Intrinsic::dx_resource_updatecounter:
HasErrors |= lowerUpdateCounter(F);
break;
diff --git a/llvm/test/CodeGen/DirectX/BufferStore-sm61.ll b/llvm/test/CodeGen/DirectX/BufferStore-sm61.ll
new file mode 100644
index 00000000000000..1916cdf3744555
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/BufferStore-sm61.ll
@@ -0,0 +1,126 @@
+; RUN: opt -S -dxil-op-lower %s | FileCheck %s
+; Before SM6.2 ByteAddressBuffer and StructuredBuffer lower to bufferStore.
+
+target triple = "dxil-pc-shadermodel6.1-compute"
+
+; CHECK-LABEL: define void @storef32_struct
+define void @storef32_struct(i32 %index, float %data) {
+ %buffer = call target("dx.RawBuffer", float, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_f32_0_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, i32 %index, i32 0, float %data, float undef, float undef, float undef, i8 1)
+ call void @llvm.dx.resource.store.rawbuffer.f32(
+ target("dx.RawBuffer", float, 1, 0, 0) %buffer,
+ i32 %index, i32 0, float %data)
+
+ ret void
+}
+
+; CHECK-LABEL: define void @storef32_byte
+define void @storef32_byte(i32 %offset, float %data) {
+ %buffer = call target("dx.RawBuffer", i8, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, i32 %offset, i32 0, float %data, float undef, float undef, float undef, i8 1)
+ call void @llvm.dx.resource.store.rawbuffer.f32(
+ target("dx.RawBuffer", i8, 1, 0, 0) %buffer,
+ i32 %offset, i32 0, float %data)
+
+ ret void
+}
+
+; CHECK-LABEL: define void @storev4f32_struct
+define void @storev4f32_struct(i32 %index, <4 x float> %data) {
+ %buffer = call target("dx.RawBuffer", <4 x float>, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v4f32_0_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; CHECK: [[DATA0:%.*]] = extractelement <4 x float> %data, i32 0
+ ; CHECK: [[DATA1:%.*]] = extractelement <4 x float> %data, i32 1
+ ; CHECK: [[DATA2:%.*]] = extractelement <4 x float> %data, i32 2
+ ; CHECK: [[DATA3:%.*]] = extractelement <4 x float> %data, i32 3
+ ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, i32 %index, i32 0, float [[DATA0]], float [[DATA1]], float [[DATA2]], float [[DATA3]], i8 15)
+ call void @llvm.dx.resource.store.rawbuffer.v4f32(
+ target("dx.RawBuffer", <4 x float>, 1, 0, 0) %buffer,
+ i32 %index, i32 0, <4 x float> %data)
+
+ ret void
+}
+
+; CHECK-LABEL: define void @storev4f32_byte
+define void @storev4f32_byte(i32 %offset, <4 x float> %data) {
+ %buffer = call target("dx.RawBuffer", i8, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; CHECK: [[DATA0:%.*]] = extractelement <4 x float> %data, i32 0
+ ; CHECK: [[DATA1:%.*]] = extractelement <4 x float> %data, i32 1
+ ; CHECK: [[DATA2:%.*]] = extractelement <4 x float> %data, i32 2
+ ; CHECK: [[DATA3:%.*]] = extractelement <4 x float> %data, i32 3
+ ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, i32 %offset, i32 0, float [[DATA0]], float [[DATA1]], float [[DATA2]], float [[DATA3]], i8 15)
+ call void @llvm.dx.resource.store.rawbuffer.v4f32(
+ target("dx.RawBuffer", i8, 1, 0, 0) %buffer,
+ i32 %offset, i32 0, <4 x float> %data)
+
+ ret void
+}
+
+; CHECK-LABEL: define void @storeelements
+define void @storeelements(i32 %index, <4 x float> %data0, <4 x i32> %data1) {
+ %buffer = call target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_sl_v4f32v4i32s_0_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; CHECK: [[DATA0_0:%.*]] = extractelement <4 x float> %data0, i32 0
+ ; CHECK: [[DATA0_1:%.*]] = extractelement <4 x float> %data0, i32 1
+ ; CHECK: [[DATA0_2:%.*]] = extractelement <4 x float> %data0, i32 2
+ ; CHECK: [[DATA0_3:%.*]] = extractelement <4 x float> %data0, i32 3
+ ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, i32 %index, i32 0, float [[DATA0_0]], float [[DATA0_1]], float [[DATA0_2]], float [[DATA0_3]], i8 15)
+ call void @llvm.dx.resource.store.rawbuffer.v4f32(
+ target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 1, 0, 0) %buffer,
+ i32 %index, i32 0, <4 x float> %data0)
+
+ ; CHECK: [[DATA1_0:%.*]] = extractelement <4 x i32> %data1, i32 0
+ ; CHECK: [[DATA1_1:%.*]] = extractelement <4 x i32> %data1, i32 1
+ ; CHECK: [[DATA1_2:%.*]] = extractelement <4 x i32> %data1, i32 2
+ ; CHECK: [[DATA1_3:%.*]] = extractelement <4 x i32> %data1, i32 3
+ ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 %index, i32 16, i32 [[DATA1_0]], i32 [[DATA1_1]], i32 [[DATA1_2]], i32 [[DATA1_3]], i8 15)
+ call void @llvm.dx.resource.store.rawbuffer.v4i32(
+ target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 1, 0, 0) %buffer,
+ i32 %index, i32 16, <4 x i32> %data1)
+
+ ret void
+}
+
+; CHECK-LABEL: define void @storenested
+define void @storenested(i32 %index, i32 %data0, <4 x float> %data1, <3 x half> %data2) {
+ %buffer = call
+ target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 %index, i32 0, i32 %data0, i32 undef, i32 undef, i32 undef, i8 1)
+ call void @llvm.dx.resource.store.rawbuffer.i32(
+ target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0) %buffer,
+ i32 %index, i32 0, i32 %data0)
+
+ ; CHECK: [[DATA1_0:%.*]] = extractelement <4 x float> %data1, i32 0
+ ; CHECK: [[DATA1_1:%.*]] = extractelement <4 x float> %data1, i32 1
+ ; CHECK: [[DATA1_2:%.*]] = extractelement <4 x float> %data1, i32 2
+ ; CHECK: [[DATA1_3:%.*]] = extractelement <4 x float> %data1, i32 3
+ ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, i32 %index, i32 4, float [[DATA1_0]], float [[DATA1_1]], float [[DATA1_2]], float [[DATA1_3]], i8 15)
+ call void @llvm.dx.resource.store.rawbuffer.v4f32(
+ target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0) %buffer,
+ i32 %index, i32 4, <4 x float> %data1)
+
+ ; CHECK: [[DATA2_0:%.*]] = extractelement <3 x half> %data2, i32 0
+ ; CHECK: [[DATA2_1:%.*]] = extractelement <3 x half> %data2, i32 1
+ ; CHECK: [[DATA2_2:%.*]] = extractelement <3 x half> %data2, i32 2
+ ; CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle %{{.*}}, i32 %index, i32 20, half [[DATA2_0]], half [[DATA2_1]], half [[DATA2_2]], half undef, i8 7)
+ call void @llvm.dx.resource.store.rawbuffer.v3f16(
+ target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0) %buffer,
+ i32 %index, i32 20, <3 x half> %data2)
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/RawBufferStore-error64.ll b/llvm/test/CodeGen/DirectX/RawBufferStore-error64.ll
new file mode 100644
index 00000000000000..a883a0bbc29fdf
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/RawBufferStore-error64.ll
@@ -0,0 +1,20 @@
+; We use llc for this test so that we don't abort after the first error.
+; RUN: not llc %s -o /dev/null 2>&1 | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.2-compute"
+
+; Can't store 64 bit types directly until SM6.3 (byteaddressbuf.Store<int64_t4>)
+; CHECK: error:
+; CHECK-SAME: in function storev4f64_byte
+; CHECK-SAME: Cannot create RawBufferStore operation: Invalid overload type
+define void @storev4f64_byte(i32 %offset, <4 x double> %data) "hlsl.export" {
+ %buffer = call target("dx.RawBuffer", i8, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ call void @llvm.dx.resource.store.rawbuffer.v4i64(
+ target("dx.RawBuffer", i8, 1, 0, 0) %buffer,
+ i32 %offset, i32 0, <4 x double> %data)
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/RawBufferStore.ll b/llvm/test/CodeGen/DirectX/RawBufferStore.ll
new file mode 100644
index 00000000000000..96824d5ee5a4af
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/RawBufferStore.ll
@@ -0,0 +1,144 @@
+; RUN: opt -S -dxil-op-lower %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+; CHECK-LABEL: define void @storef32_struct
+define void @storef32_struct(i32 %index, float %data) {
+ %buffer = call target("dx.RawBuffer", float, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_f32_0_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %buffer_annot, i32 %index, i32 0, float %data, float undef, float undef, float undef, i8 1, i32 4)
+ call void @llvm.dx.resource.store.rawbuffer.f32(
+ target("dx.RawBuffer", float, 1, 0, 0) %buffer,
+ i32 %index, i32 0, float %data)
+
+ ret void
+}
+
+; CHECK-LABEL: define void @storef32_byte
+define void @storef32_byte(i32 %offset, float %data) {
+ %buffer = call target("dx.RawBuffer", i8, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %buffer_annot, i32 %offset, i32 0, float %data, float undef, float undef, float undef, i8 1, i32 4)
+ call void @llvm.dx.resource.store.rawbuffer.f32(
+ target("dx.RawBuffer", i8, 1, 0, 0) %buffer,
+ i32 %offset, i32 0, float %data)
+
+ ret void
+}
+
+; CHECK-LABEL: define void @storev4f32_struct
+define void @storev4f32_struct(i32 %index, <4 x float> %data) {
+ %buffer = call target("dx.RawBuffer", <4 x float>, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v4f32_0_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; CHECK: [[DATA0:%.*]] = extractelement <4 x float> %data, i32 0
+ ; CHECK: [[DATA1:%.*]] = extractelement <4 x float> %data, i32 1
+ ; CHECK: [[DATA2:%.*]] = extractelement <4 x float> %data, i32 2
+ ; CHECK: [[DATA3:%.*]] = extractelement <4 x float> %data, i32 3
+ ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %buffer_annot, i32 %index, i32 0, float [[DATA0]], float [[DATA1]], float [[DATA2]], float [[DATA3]], i8 15, i32 4)
+ call void @llvm.dx.resource.store.rawbuffer.v4f32(
+ target("dx.RawBuffer", <4 x float>, 1, 0, 0) %buffer,
+ i32 %index, i32 0, <4 x float> %data)
+
+ ret void
+}
+
+; CHECK-LABEL: define void @storev4f32_byte
+define void @storev4f32_byte(i32 %offset, <4 x float> %data) {
+ %buffer = call target("dx.RawBuffer", i8, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; CHECK: [[DATA0:%.*]] = extractelement <4 x float> %data, i32 0
+ ; CHECK: [[DATA1:%.*]] = extractelement <4 x float> %data, i32 1
+ ; CHECK: [[DATA2:%.*]] = extractelement <4 x float> %data, i32 2
+ ; CHECK: [[DATA3:%.*]] = extractelement <4 x float> %data, i32 3
+ ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %buffer_annot, i32 %offset, i32 0, float [[DATA0]], float [[DATA1]], float [[DATA2]], float [[DATA3]], i8 15, i32 4)
+ call void @llvm.dx.resource.store.rawbuffer.v4f32(
+ target("dx.RawBuffer", i8, 1, 0, 0) %buffer,
+ i32 %offset, i32 0, <4 x float> %data)
+
+ ret void
+}
+
+; CHECK-LABEL: define void @storeelements
+define void @storeelements(i32 %index, <4 x float> %data0, <4 x i32> %data1) {
+ %buffer = call target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_sl_v4f32v4i32s_0_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; CHECK: [[DATA0_0:%.*]] = extractelement <4 x float> %data0, i32 0
+ ; CHECK: [[DATA0_1:%.*]] = extractelement <4 x float> %data0, i32 1
+ ; CHECK: [[DATA0_2:%.*]] = extractelement <4 x float> %data0, i32 2
+ ; CHECK: [[DATA0_3:%.*]] = extractelement <4 x float> %data0, i32 3
+ ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %buffer_annot, i32 %index, i32 0, float [[DATA0_0]], float [[DATA0_1]], float [[DATA0_2]], float [[DATA0_3]], i8 15, i32 4)
+ call void @llvm.dx.resource.store.rawbuffer.v4f32(
+ target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 1, 0, 0) %buffer,
+ i32 %index, i32 0, <4 x float> %data0)
+
+ ; CHECK: [[DATA1_0:%.*]] = extractelement <4 x i32> %data1, i32 0
+ ; CHECK: [[DATA1_1:%.*]] = extractelement <4 x i32> %data1, i32 1
+ ; CHECK: [[DATA1_2:%.*]] = extractelement <4 x i32> %data1, i32 2
+ ; CHECK: [[DATA1_3:%.*]] = extractelement <4 x i32> %data1, i32 3
+ ; CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %buffer_annot, i32 %index, i32 16, i32 [[DATA1_0]], i32 [[DATA1_1]], i32 [[DATA1_2]], i32 [[DATA1_3]], i8 15, i32 4)
+ call void @llvm.dx.resource.store.rawbuffer.v4i32(
+ target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 1, 0, 0) %buffer,
+ i32 %index, i32 16, <4 x i32> %data1)
+
+ ret void
+}
+
+; CHECK-LABEL: define void @storenested
+define void @storenested(i32 %index, i32 %data0, <4 x float> %data1, <3 x half> %data2) {
+ %buffer = call
+ target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %buffer_annot, i32 %index, i32 0, i32 %data0, i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+ call void @llvm.dx.resource.store.rawbuffer.i32(
+ target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0) %buffer,
+ i32 %index, i32 0, i32 %data0)
+
+ ; CHECK: [[DATA1_0:%.*]] = extractelement <4 x float> %data1, i32 0
+ ; CHECK: [[DATA1_1:%.*]] = extractelement <4 x float> %data1, i32 1
+ ; CHECK: [[DATA1_2:%.*]] = extractelement <4 x float> %data1, i32 2
+ ; CHECK: [[DATA1_3:%.*]] = extractelement <4 x float> %data1, i32 3
+ ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %buffer_annot, i32 %index, i32 4, float [[DATA1_0]], float [[DATA1_1]], float [[DATA1_2]], float [[DATA1_3]], i8 15, i32 4)
+ call void @llvm.dx.resource.store.rawbuffer.v4f32(
+ target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0) %buffer,
+ i32 %index, i32 4, <4 x float> %data1)
+
+ ; CHECK: [[DATA2_0:%.*]] = extractelement <3 x half> %data2, i32 0
+ ; CHECK: [[DATA2_1:%.*]] = extractelement <3 x half> %data2, i32 1
+ ; CHECK: [[DATA2_2:%.*]] = extractelement <3 x half> %data2, i32 2
+ ; CHECK: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %buffer_annot, i32 %index, i32 20, half [[DATA2_0]], half [[DATA2_1]], half [[DATA2_2]], half undef, i8 7, i32 2)
+ call void @llvm.dx.resource.store.rawbuffer.v3f16(
+ target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 1, 0, 0) %buffer,
+ i32 %index, i32 20, <3 x half> %data2)
+
+ ret void
+}
+
+; byteaddressbuf.Store<int64_t4>
+; CHECK-LABEL: define void @storev4f64_byte
+define void @storev4f64_byte(i32 %offset, <4 x double> %data) {
+ %buffer = call target("dx.RawBuffer", i8, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; CHECK: [[DATA0:%.*]] = extractelement <4 x double> %data, i32 0
+ ; CHECK: [[DATA1:%.*]] = extractelement <4 x double> %data, i32 1
+ ; CHECK: [[DATA2:%.*]] = extractelement <4 x double> %data, i32 2
+ ; CHECK: [[DATA3:%.*]] = extractelement <4 x double> %data, i32 3
+ ; CHECK: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle %buffer_annot, i32 %offset, i32 0, double [[DATA0]], double [[DATA1]], double [[DATA2]], double [[DATA3]], i8 15, i32 8)
+ call void @llvm.dx.resource.store.rawbuffer.v4i64(
+ target("dx.RawBuffer", i8, 1, 0, 0) %buffer,
+ i32 %offset, i32 0, <4 x double> %data)
+
+ ret void
+}
More information about the llvm-commits
mailing list