[llvm] 90e8411 - [DirectX] Lower `@llvm.dx.typedBufferStore` to DXIL ops
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 10 10:34:01 PDT 2024
Author: Justin Bogner
Date: 2024-09-10T10:33:57-07:00
New Revision: 90e841131aa82560b6c7d1c1840bdfc79c091b37
URL: https://github.com/llvm/llvm-project/commit/90e841131aa82560b6c7d1c1840bdfc79c091b37
DIFF: https://github.com/llvm/llvm-project/commit/90e841131aa82560b6c7d1c1840bdfc79c091b37.diff
LOG: [DirectX] Lower `@llvm.dx.typedBufferStore` to DXIL ops
The `@llvm.dx.typedBufferStore` intrinsic is lowered to `@dx.op.bufferStore`.
Pull Request: https://github.com/llvm/llvm-project/pull/104253
Added:
llvm/test/CodeGen/DirectX/BufferStore-errors.ll
llvm/test/CodeGen/DirectX/BufferStore.ll
Modified:
llvm/docs/DirectX/DXILResources.rst
llvm/include/llvm/IR/IntrinsicsDirectX.td
llvm/lib/Target/DirectX/DXIL.td
llvm/lib/Target/DirectX/DXILOpLowering.cpp
Removed:
################################################################################
diff --git a/llvm/docs/DirectX/DXILResources.rst b/llvm/docs/DirectX/DXILResources.rst
index 8e43bfaaaf32ea..a982c3a29fcc3b 100644
--- a/llvm/docs/DirectX/DXILResources.rst
+++ b/llvm/docs/DirectX/DXILResources.rst
@@ -361,11 +361,60 @@ Examples:
- ``i32``
- Index into the buffer
+Texture and Typed Buffer Stores
+-------------------------------
+
+*relevant types: Textures and TypedBuffer*
+
+The `TextureStore`_ and `BufferStore`_ DXIL operations always write all four
+32-bit components to a texture or a typed buffer. While both operations include
+a mask parameter, it is specified that the mask must cover all components when
+used with these types.
+
+The store operations that we define as intrinsics behave similarly, and will
+only accept writes to the whole of the contained type. This
diff ers from the
+loads above, but this makes sense to do from a semantics preserving point of
+view. Thus, texture and buffer stores may only operate on 4-element vectors of
+types that are 32-bits or fewer, such as ``<4 x i32>``, ``<4 x float>``, and
+``<4 x half>``, and 2 element vectors of 64-bit types like ``<2 x double>`` and
+``<2 x i64>``.
+
+.. _BufferStore: https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#bufferstore
+.. _TextureStore: https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#texturestore
+
Examples:
-.. code-block:: llvm
+.. list-table:: ``@llvm.dx.typedBufferStore``
+ :header-rows: 1
- %ret = call {<4 x float>, i1}
- @llvm.dx.typedBufferLoad.checkbit.v4f32.tdx.TypedBuffer_v4f32_0_0_0t(
- target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 %index)
+ * - Argument
+ -
+ - Type
+ - Description
+ * - Return value
+ -
+ - ``void``
+ -
+ * - ``%buffer``
+ - 0
+ - ``target(dx.TypedBuffer, ...)``
+ - The buffer to store into
+ * - ``%index``
+ - 1
+ - ``i32``
+ - Index into the buffer
+ * - ``%data``
+ - 2
+ - A 4- or 2-element vector of the type of the buffer
+ - The data to store
+
+Examples:
+
+.. code-block:: llvm
+ call void @llvm.dx.typedBufferStore.tdx.Buffer_v4f32_1_0_0t(
+ target("dx.TypedBuffer", f32, 1, 0) %buf, i32 %index, <4 x f32> %data)
+ call void @llvm.dx.typedBufferStore.tdx.Buffer_v4f16_1_0_0t(
+ target("dx.TypedBuffer", f16, 1, 0) %buf, i32 %index, <4 x f16> %data)
+ call void @llvm.dx.typedBufferStore.tdx.Buffer_v2f64_1_0_0t(
+ target("dx.TypedBuffer", f64, 1, 0) %buf, i32 %index, <2 x f64> %data)
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 40c9ac3f0da346..c36e98f040ab81 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -32,6 +32,8 @@ def int_dx_handle_fromBinding
def int_dx_typedBufferLoad
: DefaultAttrsIntrinsic<[llvm_any_ty], [llvm_any_ty, llvm_i32_ty]>;
+def int_dx_typedBufferStore
+ : DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty]>;
// Cast between target extension handle types and dxil-style opaque handles
def int_dx_cast_handle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>;
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 67a9b9d02bb6a1..759a58ed3930e3 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -707,6 +707,18 @@ def BufferLoad : DXILOp<68, bufferLoad> {
let stages = [Stages<DXIL1_0, [all_stages]>];
}
+def BufferStore : DXILOp<69, bufferStore> {
+ let Doc = "writes to an RWTypedBuffer";
+ // Handle, Coord0, Coord1, Val0, Val1, Val2, Val3, Mask
+ let arguments = [
+ HandleTy, Int32Ty, Int32Ty, OverloadTy, OverloadTy, OverloadTy, OverloadTy,
+ Int8Ty
+ ];
+ let result = VoidTy;
+ let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy, Int16Ty, Int32Ty]>];
+ let stages = [Stages<DXIL1_0, [all_stages]>];
+}
+
def ThreadId : DXILOp<93, threadId> {
let Doc = "Reads the thread ID";
let LLVMIntrinsic = int_dx_thread_id;
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index df2751d99576a8..f968cab1dccf1e 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -82,8 +82,11 @@ class OpLowerer {
public:
OpLowerer(Module &M, DXILResourceMap &DRM) : M(M), OpBuilder(M), DRM(DRM) {}
- void replaceFunction(Function &F,
- llvm::function_ref<Error(CallInst *CI)> ReplaceCall) {
+ /// Replace every call to \c F using \c ReplaceCall, and then erase \c F. If
+ /// there is an error replacing a call, we emit a diagnostic and return true.
+ [[nodiscard]] bool
+ replaceFunction(Function &F,
+ llvm::function_ref<Error(CallInst *CI)> ReplaceCall) {
for (User *U : make_early_inc_range(F.users())) {
CallInst *CI = dyn_cast<CallInst>(U);
if (!CI)
@@ -94,16 +97,18 @@ class OpLowerer {
DiagnosticInfoUnsupported Diag(*CI->getFunction(), Message,
CI->getDebugLoc());
M.getContext().diagnose(Diag);
- continue;
+ return true;
}
}
if (F.user_empty())
F.eraseFromParent();
+ return false;
}
- void replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp) {
+ [[nodiscard]]
+ bool replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp) {
bool IsVectorArgExpansion = isVectorArgExpansion(F);
- replaceFunction(F, [&](CallInst *CI) -> Error {
+ return replaceFunction(F, [&](CallInst *CI) -> Error {
SmallVector<Value *> Args;
OpBuilder.getIRB().SetInsertPoint(CI);
if (IsVectorArgExpansion) {
@@ -175,12 +180,12 @@ class OpLowerer {
CleanupCasts.clear();
}
- void lowerToCreateHandle(Function &F) {
+ [[nodiscard]] bool lowerToCreateHandle(Function &F) {
IRBuilder<> &IRB = OpBuilder.getIRB();
Type *Int8Ty = IRB.getInt8Ty();
Type *Int32Ty = IRB.getInt32Ty();
- replaceFunction(F, [&](CallInst *CI) -> Error {
+ return replaceFunction(F, [&](CallInst *CI) -> Error {
IRB.SetInsertPoint(CI);
auto *It = DRM.find(CI);
@@ -205,10 +210,10 @@ class OpLowerer {
});
}
- void lowerToBindAndAnnotateHandle(Function &F) {
+ [[nodiscard]] bool lowerToBindAndAnnotateHandle(Function &F) {
IRBuilder<> &IRB = OpBuilder.getIRB();
- replaceFunction(F, [&](CallInst *CI) -> Error {
+ return replaceFunction(F, [&](CallInst *CI) -> Error {
IRB.SetInsertPoint(CI);
auto *It = DRM.find(CI);
@@ -251,12 +256,11 @@ class OpLowerer {
/// Lower `dx.handle.fromBinding` intrinsics depending on the shader model and
/// taking into account binding information from DXILResourceAnalysis.
- void lowerHandleFromBinding(Function &F) {
+ bool lowerHandleFromBinding(Function &F) {
Triple TT(Triple(M.getTargetTriple()));
if (TT.getDXILVersion() < VersionTuple(1, 6))
- lowerToCreateHandle(F);
- else
- lowerToBindAndAnnotateHandle(F);
+ return lowerToCreateHandle(F);
+ return lowerToBindAndAnnotateHandle(F);
}
/// Replace uses of \c Intrin with the values in the `dx.ResRet` of \c Op.
@@ -342,11 +346,11 @@ class OpLowerer {
return Error::success();
}
- void lowerTypedBufferLoad(Function &F) {
+ [[nodiscard]] bool lowerTypedBufferLoad(Function &F) {
IRBuilder<> &IRB = OpBuilder.getIRB();
Type *Int32Ty = IRB.getInt32Ty();
- replaceFunction(F, [&](CallInst *CI) -> Error {
+ return replaceFunction(F, [&](CallInst *CI) -> Error {
IRB.SetInsertPoint(CI);
Value *Handle =
@@ -368,8 +372,51 @@ class OpLowerer {
});
}
+ [[nodiscard]] bool lowerTypedBufferStore(Function &F) {
+ IRBuilder<> &IRB = OpBuilder.getIRB();
+ Type *Int8Ty = IRB.getInt8Ty();
+ Type *Int32Ty = IRB.getInt32Ty();
+
+ return replaceFunction(F, [&](CallInst *CI) -> Error {
+ IRB.SetInsertPoint(CI);
+
+ Value *Handle =
+ createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
+ Value *Index0 = CI->getArgOperand(1);
+ Value *Index1 = UndefValue::get(Int32Ty);
+ // For typed stores, the mask must always cover all four elements.
+ Constant *Mask = ConstantInt::get(Int8Ty, 0xF);
+
+ Value *Data = CI->getArgOperand(2);
+ auto *DataTy = dyn_cast<FixedVectorType>(Data->getType());
+ if (!DataTy || DataTy->getNumElements() != 4)
+ return make_error<StringError>(
+ "typedBufferStore data must be a vector of 4 elements",
+ inconvertibleErrorCode());
+ Value *Data0 =
+ IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, 0));
+ Value *Data1 =
+ IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, 1));
+ Value *Data2 =
+ IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, 2));
+ Value *Data3 =
+ IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, 3));
+
+ std::array<Value *, 8> Args{Handle, Index0, Index1, Data0,
+ Data1, Data2, Data3, Mask};
+ Expected<CallInst *> OpCall =
+ OpBuilder.tryCreateOp(OpCode::BufferStore, Args);
+ if (Error E = OpCall.takeError())
+ return E;
+
+ CI->eraseFromParent();
+ return Error::success();
+ });
+ }
+
bool lowerIntrinsics() {
bool Updated = false;
+ bool HasErrors = false;
for (Function &F : make_early_inc_range(M.functions())) {
if (!F.isDeclaration())
@@ -380,19 +427,22 @@ class OpLowerer {
continue;
#define DXIL_OP_INTRINSIC(OpCode, Intrin) \
case Intrin: \
- replaceFunctionWithOp(F, OpCode); \
+ HasErrors |= replaceFunctionWithOp(F, OpCode); \
break;
#include "DXILOperation.inc"
case Intrinsic::dx_handle_fromBinding:
- lowerHandleFromBinding(F);
+ HasErrors |= lowerHandleFromBinding(F);
break;
case Intrinsic::dx_typedBufferLoad:
- lowerTypedBufferLoad(F);
+ HasErrors |= lowerTypedBufferLoad(F);
+ break;
+ case Intrinsic::dx_typedBufferStore:
+ HasErrors |= lowerTypedBufferStore(F);
break;
}
Updated = true;
}
- if (Updated)
+ if (Updated && !HasErrors)
cleanupHandleCasts();
return Updated;
diff --git a/llvm/test/CodeGen/DirectX/BufferStore-errors.ll b/llvm/test/CodeGen/DirectX/BufferStore-errors.ll
new file mode 100644
index 00000000000000..b00fc38c901f92
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/BufferStore-errors.ll
@@ -0,0 +1,37 @@
+; We use llc for this test so that we don't abort after the first error.
+; RUN: not llc %s -o /dev/null 2>&1 | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+; CHECK: error:
+; CHECK-SAME: in function storetoomany
+; CHECK-SAME: typedBufferStore data must be a vector of 4 elements
+define void @storetoomany(<5 x float> %data, i32 %index) {
+ %buffer = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
+ @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v5f32(
+ target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer,
+ i32 %index, <5 x float> %data)
+
+ ret void
+}
+
+; CHECK: error:
+; CHECK-SAME: in function storetoofew
+; CHECK-SAME: typedBufferStore data must be a vector of 4 elements
+define void @storetoofew(<3 x i32> %data, i32 %index) {
+ %buffer = call target("dx.TypedBuffer", <4 x i32>, 1, 0, 0)
+ @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4i32_1_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4i32_1_0_0t.v3i32(
+ target("dx.TypedBuffer", <4 x i32>, 1, 0, 0) %buffer,
+ i32 %index, <3 x i32> %data)
+
+ ret void
+}
+
+declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v5f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <5 x float>)
+declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4i32_1_0_0t.v3i32(target("dx.TypedBuffer", <4 x i32>, 1, 0, 0), i32, <3 x i32>)
diff --git a/llvm/test/CodeGen/DirectX/BufferStore.ll b/llvm/test/CodeGen/DirectX/BufferStore.ll
new file mode 100644
index 00000000000000..4aebbe155dc995
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/BufferStore.ll
@@ -0,0 +1,92 @@
+; RUN: opt -S -dxil-op-lower %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+define void @storefloat(<4 x float> %data, i32 %index) {
+
+ ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding
+ ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]]
+ %buffer = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
+ @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; The temporary casts should all have been cleaned up
+ ; CHECK-NOT: %dx.cast_handle
+
+ ; CHECK: [[DATA0_0:%.*]] = extractelement <4 x float> %data, i32 0
+ ; CHECK: [[DATA0_1:%.*]] = extractelement <4 x float> %data, i32 1
+ ; CHECK: [[DATA0_2:%.*]] = extractelement <4 x float> %data, i32 2
+ ; CHECK: [[DATA0_3:%.*]] = extractelement <4 x float> %data, i32 3
+ ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, float [[DATA0_0]], float [[DATA0_1]], float [[DATA0_2]], float [[DATA0_3]], i8 15)
+ call void @llvm.dx.typedBufferStore(
+ target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer,
+ i32 %index, <4 x float> %data)
+
+ ret void
+}
+
+define void @storeint(<4 x i32> %data, i32 %index) {
+
+ ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding
+ ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]]
+ %buffer = call target("dx.TypedBuffer", <4 x i32>, 1, 0, 0)
+ @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4i32_1_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; CHECK: [[DATA0_0:%.*]] = extractelement <4 x i32> %data, i32 0
+ ; CHECK: [[DATA0_1:%.*]] = extractelement <4 x i32> %data, i32 1
+ ; CHECK: [[DATA0_2:%.*]] = extractelement <4 x i32> %data, i32 2
+ ; CHECK: [[DATA0_3:%.*]] = extractelement <4 x i32> %data, i32 3
+ ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, i32 [[DATA0_0]], i32 [[DATA0_1]], i32 [[DATA0_2]], i32 [[DATA0_3]], i8 15)
+ call void @llvm.dx.typedBufferStore(
+ target("dx.TypedBuffer", <4 x i32>, 1, 0, 0) %buffer,
+ i32 %index, <4 x i32> %data)
+
+ ret void
+}
+
+define void @storehalf(<4 x half> %data, i32 %index) {
+
+ ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding
+ ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]]
+ %buffer = call target("dx.TypedBuffer", <4 x half>, 1, 0, 0)
+ @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f16_1_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; The temporary casts should all have been cleaned up
+ ; CHECK-NOT: %dx.cast_handle
+
+ ; CHECK: [[DATA0_0:%.*]] = extractelement <4 x half> %data, i32 0
+ ; CHECK: [[DATA0_1:%.*]] = extractelement <4 x half> %data, i32 1
+ ; CHECK: [[DATA0_2:%.*]] = extractelement <4 x half> %data, i32 2
+ ; CHECK: [[DATA0_3:%.*]] = extractelement <4 x half> %data, i32 3
+ ; CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, half [[DATA0_0]], half [[DATA0_1]], half [[DATA0_2]], half [[DATA0_3]], i8 15)
+ call void @llvm.dx.typedBufferStore(
+ target("dx.TypedBuffer", <4 x half>, 1, 0, 0) %buffer,
+ i32 %index, <4 x half> %data)
+
+ ret void
+}
+
+define void @storei16(<4 x i16> %data, i32 %index) {
+
+ ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding
+ ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]]
+ %buffer = call target("dx.TypedBuffer", <4 x i16>, 1, 0, 0)
+ @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4i16_1_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; The temporary casts should all have been cleaned up
+ ; CHECK-NOT: %dx.cast_handle
+
+ ; CHECK: [[DATA0_0:%.*]] = extractelement <4 x i16> %data, i32 0
+ ; CHECK: [[DATA0_1:%.*]] = extractelement <4 x i16> %data, i32 1
+ ; CHECK: [[DATA0_2:%.*]] = extractelement <4 x i16> %data, i32 2
+ ; CHECK: [[DATA0_3:%.*]] = extractelement <4 x i16> %data, i32 3
+ ; CHECK: call void @dx.op.bufferStore.i16(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, i16 [[DATA0_0]], i16 [[DATA0_1]], i16 [[DATA0_2]], i16 [[DATA0_3]], i8 15)
+ call void @llvm.dx.typedBufferStore(
+ target("dx.TypedBuffer", <4 x i16>, 1, 0, 0) %buffer,
+ i32 %index, <4 x i16> %data)
+
+ ret void
+}
More information about the llvm-commits
mailing list