[llvm-branch-commits] [llvm] [DirectX] Introduce the DXILResourceAccess pass (PR #116726)
Justin Bogner via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Dec 12 14:49:45 PST 2024
https://github.com/bogner updated https://github.com/llvm/llvm-project/pull/116726
>From 54de63603f27a0a350a80d2509ab3cf4bc26fb05 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Sun, 3 Nov 2024 14:28:39 -0800
Subject: [PATCH 1/6] [DirectX] Introduce the DXILResourceAccess pass
This pass transforms resource access via `llvm.dx.resource.getpointer`
into buffer loads and stores.
Fixes #114848.
---
llvm/lib/Target/DirectX/CMakeLists.txt | 1 +
.../lib/Target/DirectX/DXILResourceAccess.cpp | 196 ++++++++++++++++++
llvm/lib/Target/DirectX/DXILResourceAccess.h | 28 +++
llvm/lib/Target/DirectX/DirectX.h | 7 +
.../Target/DirectX/DirectXPassRegistry.def | 6 +
.../Target/DirectX/DirectXTargetMachine.cpp | 5 +-
llvm/lib/Transforms/Scalar/Scalarizer.cpp | 3 +
.../ResourceAccess/load_typedbuffer.ll | 35 ++++
.../ResourceAccess/store_typedbuffer.ll | 103 +++++++++
llvm/test/CodeGen/DirectX/llc-pipeline.ll | 2 +
10 files changed, 385 insertions(+), 1 deletion(-)
create mode 100644 llvm/lib/Target/DirectX/DXILResourceAccess.cpp
create mode 100644 llvm/lib/Target/DirectX/DXILResourceAccess.h
create mode 100644 llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll
create mode 100644 llvm/test/CodeGen/DirectX/ResourceAccess/store_typedbuffer.ll
diff --git a/llvm/lib/Target/DirectX/CMakeLists.txt b/llvm/lib/Target/DirectX/CMakeLists.txt
index a726071e0dcecd..26315db891b577 100644
--- a/llvm/lib/Target/DirectX/CMakeLists.txt
+++ b/llvm/lib/Target/DirectX/CMakeLists.txt
@@ -30,6 +30,7 @@ add_llvm_target(DirectXCodeGen
DXILPrettyPrinter.cpp
DXILResource.cpp
DXILResourceAnalysis.cpp
+ DXILResourceAccess.cpp
DXILShaderFlags.cpp
DXILTranslateMetadata.cpp
diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
new file mode 100644
index 00000000000000..f9b28800b74909
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
@@ -0,0 +1,196 @@
+//===- DXILResourceAccess.cpp - Resource access via load/store ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "DXILResourceAccess.h"
+#include "DirectX.h"
+#include "llvm/Analysis/DXILResource.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsDirectX.h"
+#include "llvm/InitializePasses.h"
+
+#define DEBUG_TYPE "dxil-resource-access"
+
+using namespace llvm;
+
+static void replaceTypedBufferAccess(IntrinsicInst *II,
+ dxil::ResourceInfo &RI) {
+ const DataLayout &DL = II->getDataLayout();
+
+ auto *HandleType = cast<TargetExtType>(II->getOperand(0)->getType());
+ assert(HandleType->getName() == "dx.TypedBuffer" &&
+ "Unexpected typed buffer type");
+ Type *ContainedType = HandleType->getTypeParameter(0);
+ Type *ScalarType = ContainedType->getScalarType();
+ uint64_t ScalarSize = DL.getTypeSizeInBits(ScalarType) / 8;
+ int NumElements = ContainedType->getNumContainedTypes();
+ if (!NumElements)
+ NumElements = 1;
+
+ // Process users keeping track of indexing accumulated from GEPs.
+ struct AccessAndIndex {
+ User *Access;
+ Value *Index;
+ };
+ SmallVector<AccessAndIndex> Worklist;
+ for (User *U : II->users())
+ Worklist.push_back({U, nullptr});
+
+ SmallVector<Instruction *> DeadInsts;
+ while (!Worklist.empty()) {
+ AccessAndIndex Current = Worklist.back();
+ Worklist.pop_back();
+
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(Current.Access)) {
+ IRBuilder<> Builder(GEP);
+
+ Value *Index;
+ APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
+ if (GEP->accumulateConstantOffset(DL, ConstantOffset)) {
+ APInt Scaled = ConstantOffset.udiv(ScalarSize);
+ Index = ConstantInt::get(Builder.getInt32Ty(), Scaled);
+ } else {
+ auto IndexIt = GEP->idx_begin();
+ assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 &&
+ "GEP is not indexing through pointer");
+ ++IndexIt;
+ Index = *IndexIt;
+ assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP");
+ }
+
+ for (User *U : GEP->users())
+ Worklist.push_back({U, Index});
+ DeadInsts.push_back(GEP);
+
+ } else if (auto *SI = dyn_cast<StoreInst>(Current.Access)) {
+ assert(SI->getValueOperand() != II && "Pointer escaped!");
+ IRBuilder<> Builder(SI);
+
+ Value *V = SI->getValueOperand();
+ if (V->getType() == ContainedType) {
+ // V is already the right type.
+ } else if (V->getType() == ScalarType) {
+ // We're storing a scalar, so we need to load the current value and only
+ // replace the relevant part.
+ auto *Load = Builder.CreateIntrinsic(
+ ContainedType, Intrinsic::dx_typedBufferLoad,
+ {II->getOperand(0), II->getOperand(1)});
+ // If we have an offset from seeing a GEP earlier, use it.
+ Value *IndexOp = Current.Index
+ ? Current.Index
+ : ConstantInt::get(Builder.getInt32Ty(), 0);
+ V = Builder.CreateInsertElement(Load, V, IndexOp);
+ } else {
+ llvm_unreachable("Store to typed resource has invalid type");
+ }
+
+ auto *Inst = Builder.CreateIntrinsic(
+ Builder.getVoidTy(), Intrinsic::dx_typedBufferStore,
+ {II->getOperand(0), II->getOperand(1), V});
+ SI->replaceAllUsesWith(Inst);
+ DeadInsts.push_back(SI);
+
+ } else if (auto *LI = dyn_cast<LoadInst>(Current.Access)) {
+ IRBuilder<> Builder(LI);
+ Value *V =
+ Builder.CreateIntrinsic(ContainedType, Intrinsic::dx_typedBufferLoad,
+ {II->getOperand(0), II->getOperand(1)});
+ if (Current.Index)
+ V = Builder.CreateExtractElement(V, Current.Index);
+
+ LI->replaceAllUsesWith(V);
+ DeadInsts.push_back(LI);
+
+ } else
+ llvm_unreachable("Unhandled instruction - pointer escaped?");
+ }
+
+ // Traverse the now-dead instructions in RPO and remove them.
+ for (Instruction *Dead : llvm::reverse(DeadInsts))
+ Dead->eraseFromParent();
+ II->eraseFromParent();
+}
+
+static bool transformResourcePointers(Function &F, DXILResourceMap &DRM) {
+ // TODO: Should we have a more efficient way to find resources used in a
+ // particular function?
+ SmallVector<std::pair<IntrinsicInst *, dxil::ResourceInfo &>> Resources;
+ for (BasicBlock &BB : F)
+ for (Instruction &I : BB)
+ if (auto *CI = dyn_cast<CallInst>(&I)) {
+ auto It = DRM.find(CI);
+ if (It == DRM.end())
+ continue;
+ for (User *U : CI->users())
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U))
+ if (II->getIntrinsicID() == Intrinsic::dx_resource_getpointer)
+ Resources.emplace_back(II, *It);
+ }
+
+ for (const auto &[II, RI] : Resources) {
+ if (RI.isTyped())
+ replaceTypedBufferAccess(II, RI);
+
+ // TODO: handle other resource types. We should probably have an
+ // `unreachable` here once we've added support for all of them.
+ }
+
+ return false;
+}
+
+PreservedAnalyses DXILResourceAccess::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
+ DXILResourceMap *DRM =
+ MAMProxy.getCachedResult<DXILResourceAnalysis>(*F.getParent());
+ assert(DRM && "DXILResourceAnalysis must be available");
+
+ bool MadeChanges = transformResourcePointers(F, *DRM);
+ if (!MadeChanges)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<DXILResourceAnalysis>();
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+
+namespace {
+class DXILResourceAccessLegacy : public FunctionPass {
+public:
+ bool runOnFunction(Function &F) override {
+ DXILResourceMap &DRM =
+ getAnalysis<DXILResourceWrapperPass>().getResourceMap();
+
+ return transformResourcePointers(F, DRM);
+ }
+ StringRef getPassName() const override { return "DXIL Resource Access"; }
+ DXILResourceAccessLegacy() : FunctionPass(ID) {}
+
+ static char ID; // Pass identification.
+ void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
+ AU.addRequired<DXILResourceWrapperPass>();
+ AU.addPreserved<DXILResourceWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+};
+char DXILResourceAccessLegacy::ID = 0;
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(DXILResourceAccessLegacy, DEBUG_TYPE,
+ "DXIL Resource Access", false, false)
+INITIALIZE_PASS_DEPENDENCY(DXILResourceWrapperPass)
+INITIALIZE_PASS_END(DXILResourceAccessLegacy, DEBUG_TYPE,
+ "DXIL Resource Access", false, false)
+
+FunctionPass *llvm::createDXILResourceAccessLegacyPass() {
+ return new DXILResourceAccessLegacy();
+}
diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.h b/llvm/lib/Target/DirectX/DXILResourceAccess.h
new file mode 100644
index 00000000000000..ac47db21266f64
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILResourceAccess.h
@@ -0,0 +1,28 @@
+//===- DXILResourceAccess.h - Resource access via load/store ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file Pass for replacing pointers to DXIL resources with load and store
+// operations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_DIRECTX_DXILRESOURCEACCESS_H
+#define LLVM_LIB_TARGET_DIRECTX_DXILRESOURCEACCESS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class DXILResourceAccess: public PassInfoMixin<DXILResourceAccess> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_DIRECTX_DXILRESOURCEACCESS_H
diff --git a/llvm/lib/Target/DirectX/DirectX.h b/llvm/lib/Target/DirectX/DirectX.h
index 3454f16ecd5955..add23587de7d58 100644
--- a/llvm/lib/Target/DirectX/DirectX.h
+++ b/llvm/lib/Target/DirectX/DirectX.h
@@ -12,6 +12,7 @@
#define LLVM_LIB_TARGET_DIRECTX_DIRECTX_H
namespace llvm {
+class FunctionPass;
class ModulePass;
class PassRegistry;
class raw_ostream;
@@ -52,6 +53,12 @@ void initializeDXILOpLoweringLegacyPass(PassRegistry &);
/// Pass to lowering LLVM intrinsic call to DXIL op function call.
ModulePass *createDXILOpLoweringLegacyPass();
+/// Initializer for DXILResourceAccess
+void initializeDXILResourceAccessLegacyPass(PassRegistry &);
+
+/// Pass to update resource accesses to use load/store directly.
+FunctionPass *createDXILResourceAccessLegacyPass();
+
/// Initializer for DXILTranslateMetadata.
void initializeDXILTranslateMetadataLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/DirectX/DirectXPassRegistry.def b/llvm/lib/Target/DirectX/DirectXPassRegistry.def
index a0f864ed39375f..87591b104ce52c 100644
--- a/llvm/lib/Target/DirectX/DirectXPassRegistry.def
+++ b/llvm/lib/Target/DirectX/DirectXPassRegistry.def
@@ -32,3 +32,9 @@ MODULE_PASS("dxil-translate-metadata", DXILTranslateMetadata())
// TODO: rename to print<foo> after NPM switch
MODULE_PASS("print-dx-shader-flags", dxil::ShaderFlagsAnalysisPrinter(dbgs()))
#undef MODULE_PASS
+
+#ifndef FUNCTION_PASS
+#define FUNCTION_PASS(NAME, CREATE_PASS)
+#endif
+FUNCTION_PASS("dxil-resource-access", DXILResourceAccess())
+#undef FUNCTION_PASS
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index de14c8d9f13e8d..d4e35fb7503160 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -17,6 +17,7 @@
#include "DXILIntrinsicExpansion.h"
#include "DXILOpLowering.h"
#include "DXILPrettyPrinter.h"
+#include "DXILResourceAccess.h"
#include "DXILResourceAnalysis.h"
#include "DXILShaderFlags.h"
#include "DXILTranslateMetadata.h"
@@ -56,6 +57,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() {
initializeWriteDXILPassPass(*PR);
initializeDXContainerGlobalsPass(*PR);
initializeDXILOpLoweringLegacyPass(*PR);
+ initializeDXILResourceAccessLegacyPass(*PR);
initializeDXILTranslateMetadataLegacyPass(*PR);
initializeDXILResourceMDWrapperPass(*PR);
initializeShaderFlagsAnalysisWrapperPass(*PR);
@@ -92,9 +94,10 @@ class DirectXPassConfig : public TargetPassConfig {
addPass(createDXILFinalizeLinkageLegacyPass());
addPass(createDXILIntrinsicExpansionLegacyPass());
addPass(createDXILDataScalarizationLegacyPass());
+ addPass(createDXILFlattenArraysLegacyPass());
+ addPass(createDXILResourceAccessLegacyPass());
ScalarizerPassOptions DxilScalarOptions;
DxilScalarOptions.ScalarizeLoadStore = true;
- addPass(createDXILFlattenArraysLegacyPass());
addPass(createScalarizerPass(DxilScalarOptions));
addPass(createDXILOpLoweringLegacyPass());
addPass(createDXILTranslateMetadataLegacyPass());
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 3b701e6ca09761..c231df875bb218 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/DXILResource.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Argument.h"
@@ -351,6 +352,7 @@ void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<DXILResourceWrapperPass>();
}
char ScalarizerLegacyPass::ID = 0;
@@ -1348,5 +1350,6 @@ PreservedAnalyses ScalarizerPass::run(Function &F, FunctionAnalysisManager &AM)
bool Changed = Impl.visit(F);
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<DXILResourceAnalysis>();
return Changed ? PA : PreservedAnalyses::all();
}
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll
new file mode 100644
index 00000000000000..2c17ec674632ba
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -dxil-resource-access %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+declare void @use_float4(<4 x float>)
+declare void @use_float(<4 x float>)
+
+; CHECK-LABEL: define void @load_float4
+define void @load_float4(i32 %index, i32 %elemindex) {
+ %buffer = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
+ @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; CHECK-NOT: @llvm.dx.resource.getpointer
+ %ptr = call ptr @llvm.dx.resource.getpointer(
+ target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
+
+ ; CHECK: %[[VALUE:.*]] = call <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
+ %vec_data = load <4 x float>, ptr %ptr
+ call void @use_float4(<4 x float> %vec_data)
+
+ ; CHECK: %[[VALUE:.*]] = call <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
+ ; CHECK: extractelement <4 x float> %[[VALUE]], i32 4
+ %y_ptr = getelementptr inbounds <4 x float>, ptr %ptr, i32 0, i32 4
+ %y_data = load float, ptr %y_ptr
+ call void @use_float(float %y_data)
+
+ ; CHECK: %[[VALUE:.*]] = call <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
+ ; CHECK: extractelement <4 x float> %[[VALUE]], i32 %elemindex
+ %dynamic = getelementptr inbounds <4 x float>, ptr %ptr, i32 0, i32 %elemindex
+ %dyndata = load float, ptr %dynamic
+ call void @use_float(float %dyndata)
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/store_typedbuffer.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/store_typedbuffer.ll
new file mode 100644
index 00000000000000..dd63acc3c0e96c
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/store_typedbuffer.ll
@@ -0,0 +1,103 @@
+; RUN: opt -S -dxil-resource-access %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+; CHECK-LABEL: define void @store_float4
+define void @store_float4(<4 x float> %data, i32 %index, i32 %elemindex) {
+ %buffer = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
+ @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; CHECK-NOT: @llvm.dx.resource.getpointer
+ %ptr = call ptr @llvm.dx.resource.getpointer(
+ target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
+
+ ; Store the whole value
+ ; CHECK: call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index, <4 x float> %data)
+ store <4 x float> %data, ptr %ptr
+
+ ; Store just the .x component
+ %scalar = extractelement <4 x float> %data, i32 0
+ ; CHECK: %[[LOAD:.*]] = call <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
+ ; CHECK: %[[INSERT:.*]] = insertelement <4 x float> %[[LOAD]], float %scalar, i32 0
+ ; CHECK: call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index, <4 x float> %[[INSERT]])
+ store float %scalar, ptr %ptr
+
+ ; Store just the .y component
+ ; CHECK: %[[LOAD:.*]] = call <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
+ ; CHECK: %[[INSERT:.*]] = insertelement <4 x float> %[[LOAD]], float %scalar, i32 1
+ ; CHECK: call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index, <4 x float> %[[INSERT]])
+ %y_ptr = getelementptr inbounds i8, ptr %ptr, i32 4
+ store float %scalar, ptr %y_ptr
+
+ ; Store to one of the elements dynamically
+ ; CHECK: %[[LOAD:.*]] = call <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
+ ; CHECK: %[[INSERT:.*]] = insertelement <4 x float> %[[LOAD]], float %scalar, i32 %elemindex
+ ; CHECK: call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index, <4 x float> %[[INSERT]])
+ %dynamic = getelementptr inbounds <4 x float>, ptr %ptr, i32 0, i32 %elemindex
+ store float %scalar, ptr %dynamic
+
+ ret void
+}
+
+; CHECK-LABEL: define void @store_half4
+define void @store_half4(<4 x half> %data, i32 %index) {
+ %buffer = call target("dx.TypedBuffer", <4 x half>, 1, 0, 0)
+ @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f16_1_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; CHECK-NOT: @llvm.dx.resource.getpointer
+ %ptr = call ptr @llvm.dx.resource.getpointer(
+ target("dx.TypedBuffer", <4 x half>, 1, 0, 0) %buffer, i32 %index)
+
+ ; Store the whole value
+ ; CHECK: call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f16_1_0_0t.v4f16(target("dx.TypedBuffer", <4 x half>, 1, 0, 0) %buffer, i32 %index, <4 x half> %data)
+ store <4 x half> %data, ptr %ptr
+
+ ; Store just the .x component
+ %scalar = extractelement <4 x half> %data, i32 0
+ ; CHECK: %[[LOAD:.*]] = call <4 x half> @llvm.dx.typedBufferLoad.v4f16.tdx.TypedBuffer_v4f16_1_0_0t(target("dx.TypedBuffer", <4 x half>, 1, 0, 0) %buffer, i32 %index)
+ ; CHECK: %[[INSERT:.*]] = insertelement <4 x half> %[[LOAD]], half %scalar, i32 0
+ ; CHECK: call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f16_1_0_0t.v4f16(target("dx.TypedBuffer", <4 x half>, 1, 0, 0) %buffer, i32 %index, <4 x half> %[[INSERT]])
+ store half %scalar, ptr %ptr
+
+ ; Store just the .y component
+ ; CHECK: %[[LOAD:.*]] = call <4 x half> @llvm.dx.typedBufferLoad.v4f16.tdx.TypedBuffer_v4f16_1_0_0t(target("dx.TypedBuffer", <4 x half>, 1, 0, 0) %buffer, i32 %index)
+ ; CHECK: %[[INSERT:.*]] = insertelement <4 x half> %[[LOAD]], half %scalar, i32 1
+ ; CHECK: call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f16_1_0_0t.v4f16(target("dx.TypedBuffer", <4 x half>, 1, 0, 0) %buffer, i32 %index, <4 x half> %[[INSERT]])
+ %y_ptr = getelementptr inbounds i8, ptr %ptr, i32 2
+ store half %scalar, ptr %y_ptr
+
+ ret void
+}
+
+; CHECK-LABEL: define void @store_double2
+define void @store_double2(<2 x double> %data, i32 %index) {
+ %buffer = call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+ @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v2f64_1_0_0(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; CHECK-NOT: @llvm.dx.resource.getpointer
+ %ptr = call ptr @llvm.dx.resource.getpointer(
+ target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 %index)
+
+ ; Store the whole value
+ ; CHECK: call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v2f64_1_0_0t.v2f64(target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 %index, <2 x double> %data)
+ store <2 x double> %data, ptr %ptr
+
+ ; Store just the .x component
+ %scalar = extractelement <2 x double> %data, i32 0
+ ; CHECK: %[[LOAD:.*]] = call <2 x double> @llvm.dx.typedBufferLoad.v2f64.tdx.TypedBuffer_v2f64_1_0_0t(target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 %index)
+ ; CHECK: %[[INSERT:.*]] = insertelement <2 x double> %[[LOAD]], double %scalar, i32 0
+ ; CHECK: call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v2f64_1_0_0t.v2f64(target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 %index, <2 x double> %[[INSERT]])
+ store double %scalar, ptr %ptr
+
+ ; Store just the .y component
+ ; CHECK: %[[LOAD:.*]] = call <2 x double> @llvm.dx.typedBufferLoad.v2f64.tdx.TypedBuffer_v2f64_1_0_0t(target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 %index)
+ ; CHECK: %[[INSERT:.*]] = insertelement <2 x double> %[[LOAD]], double %scalar, i32 1
+ ; CHECK: call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v2f64_1_0_0t.v2f64(target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 %index, <2 x double> %[[INSERT]])
+ %y_ptr = getelementptr inbounds i8, ptr %ptr, i32 8
+ store double %scalar, ptr %y_ptr
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
index 147898efc716fd..ed484f288d9d09 100644
--- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll
+++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
@@ -12,7 +12,9 @@
; CHECK-NEXT: DXIL Intrinsic Expansion
; CHECK-NEXT: DXIL Data Scalarization
; CHECK-NEXT: DXIL Array Flattener
+; CHECK-NEXT: DXIL Resource analysis
; CHECK-NEXT: FunctionPass Manager
+; CHECK-NEXT: DXIL Resource Access
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Scalarize vector operations
; CHECK-NEXT: DXIL Resource Binding Analysis
>From 34d952b9cb6875e4dbe0a543c2c4857a59d34e79 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Wed, 20 Nov 2024 09:38:19 -0800
Subject: [PATCH 2/6] clang-format
---
llvm/lib/Target/DirectX/DXILResourceAccess.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.h b/llvm/lib/Target/DirectX/DXILResourceAccess.h
index ac47db21266f64..9e17b57c625789 100644
--- a/llvm/lib/Target/DirectX/DXILResourceAccess.h
+++ b/llvm/lib/Target/DirectX/DXILResourceAccess.h
@@ -18,7 +18,7 @@
namespace llvm {
-class DXILResourceAccess: public PassInfoMixin<DXILResourceAccess> {
+class DXILResourceAccess : public PassInfoMixin<DXILResourceAccess> {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
>From 52fa4f22357ac1ab3c59d04ef3dd4e0c4606fdcb Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Sun, 3 Nov 2024 14:28:39 -0800
Subject: [PATCH 3/6] fix indexing of "y" element
---
llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll
index 2c17ec674632ba..6e4d585488c5f7 100644
--- a/llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll
@@ -20,8 +20,8 @@ define void @load_float4(i32 %index, i32 %elemindex) {
call void @use_float4(<4 x float> %vec_data)
; CHECK: %[[VALUE:.*]] = call <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, i32 %index)
- ; CHECK: extractelement <4 x float> %[[VALUE]], i32 4
- %y_ptr = getelementptr inbounds <4 x float>, ptr %ptr, i32 0, i32 4
+ ; CHECK: extractelement <4 x float> %[[VALUE]], i32 1
+ %y_ptr = getelementptr inbounds <4 x float>, ptr %ptr, i32 0, i32 1
%y_data = load float, ptr %y_ptr
call void @use_float(float %y_data)
>From 8222cce726932a90167e6bc65da3480059599eda Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Tue, 26 Nov 2024 15:17:44 -0800
Subject: [PATCH 4/6] fixup: Add comment and fix test function signature
---
llvm/lib/Target/DirectX/DXILResourceAccess.cpp | 6 +++---
.../test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll | 2 +-
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
index f9b28800b74909..6e734a1ec2b25a 100644
--- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
@@ -29,11 +29,11 @@ static void replaceTypedBufferAccess(IntrinsicInst *II,
assert(HandleType->getName() == "dx.TypedBuffer" &&
"Unexpected typed buffer type");
Type *ContainedType = HandleType->getTypeParameter(0);
+
+ // We need the size of an element in bytes so that we can calculate the offset
+ // in elements given a total offset in bytes later.
Type *ScalarType = ContainedType->getScalarType();
uint64_t ScalarSize = DL.getTypeSizeInBits(ScalarType) / 8;
- int NumElements = ContainedType->getNumContainedTypes();
- if (!NumElements)
- NumElements = 1;
// Process users keeping track of indexing accumulated from GEPs.
struct AccessAndIndex {
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll
index 6e4d585488c5f7..a40e6cbc6fa4ff 100644
--- a/llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load_typedbuffer.ll
@@ -3,7 +3,7 @@
target triple = "dxil-pc-shadermodel6.6-compute"
declare void @use_float4(<4 x float>)
-declare void @use_float(<4 x float>)
+declare void @use_float(float)
; CHECK-LABEL: define void @load_float4
define void @load_float4(i32 %index, i32 %elemindex) {
>From 9769f77797e920e25a1f50bd9685471f6a4479c9 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Sun, 3 Nov 2024 14:28:39 -0800
Subject: [PATCH 5/6] fixup: adapt to changes in #119773
---
.../lib/Target/DirectX/DXILResourceAccess.cpp | 54 +++++++++----------
llvm/lib/Transforms/Scalar/Scalarizer.cpp | 3 --
llvm/test/CodeGen/DirectX/llc-pipeline.ll | 2 +-
3 files changed, 26 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
index 6e734a1ec2b25a..7e9f9e1593e967 100644
--- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
@@ -22,7 +22,7 @@
using namespace llvm;
static void replaceTypedBufferAccess(IntrinsicInst *II,
- dxil::ResourceInfo &RI) {
+ dxil::ResourceTypeInfo &RTI) {
const DataLayout &DL = II->getDataLayout();
auto *HandleType = cast<TargetExtType>(II->getOperand(0)->getType());
@@ -119,46 +119,43 @@ static void replaceTypedBufferAccess(IntrinsicInst *II,
II->eraseFromParent();
}
-static bool transformResourcePointers(Function &F, DXILResourceMap &DRM) {
- // TODO: Should we have a more efficient way to find resources used in a
- // particular function?
- SmallVector<std::pair<IntrinsicInst *, dxil::ResourceInfo &>> Resources;
+static bool transformResourcePointers(Function &F, DXILResourceTypeMap &DRTM) {
+ bool Changed = false;
+ SmallVector<std::pair<IntrinsicInst *, dxil::ResourceTypeInfo>> Resources;
for (BasicBlock &BB : F)
for (Instruction &I : BB)
- if (auto *CI = dyn_cast<CallInst>(&I)) {
- auto It = DRM.find(CI);
- if (It == DRM.end())
- continue;
- for (User *U : CI->users())
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U))
- if (II->getIntrinsicID() == Intrinsic::dx_resource_getpointer)
- Resources.emplace_back(II, *It);
- }
-
- for (const auto &[II, RI] : Resources) {
- if (RI.isTyped())
+ if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ if (II->getIntrinsicID() == Intrinsic::dx_resource_getpointer) {
+ auto *HandleTy = cast<TargetExtType>(II->getArgOperand(0)->getType());
+ Resources.emplace_back(II, DRTM[HandleTy]);
+ }
+
+ for (auto &[II, RI] : Resources) {
+ if (RI.isTyped()) {
+ Changed = true;
replaceTypedBufferAccess(II, RI);
+ }
// TODO: handle other resource types. We should probably have an
// `unreachable` here once we've added support for all of them.
}
- return false;
+ return Changed;
}
PreservedAnalyses DXILResourceAccess::run(Function &F,
FunctionAnalysisManager &FAM) {
auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
- DXILResourceMap *DRM =
- MAMProxy.getCachedResult<DXILResourceAnalysis>(*F.getParent());
- assert(DRM && "DXILResourceAnalysis must be available");
+ DXILResourceTypeMap *DRTM =
+ MAMProxy.getCachedResult<DXILResourceTypeAnalysis>(*F.getParent());
+ assert(DRTM && "DXILResourceTypeAnalysis must be available");
- bool MadeChanges = transformResourcePointers(F, *DRM);
+ bool MadeChanges = transformResourcePointers(F, *DRTM);
if (!MadeChanges)
return PreservedAnalyses::all();
PreservedAnalyses PA;
- PA.preserve<DXILResourceAnalysis>();
+ PA.preserve<DXILResourceTypeAnalysis>();
PA.preserve<DominatorTreeAnalysis>();
return PA;
}
@@ -167,18 +164,17 @@ namespace {
class DXILResourceAccessLegacy : public FunctionPass {
public:
bool runOnFunction(Function &F) override {
- DXILResourceMap &DRM =
- getAnalysis<DXILResourceWrapperPass>().getResourceMap();
+ DXILResourceTypeMap &DRTM =
+ getAnalysis<DXILResourceTypeWrapperPass>().getResourceTypeMap();
- return transformResourcePointers(F, DRM);
+ return transformResourcePointers(F, DRTM);
}
StringRef getPassName() const override { return "DXIL Resource Access"; }
DXILResourceAccessLegacy() : FunctionPass(ID) {}
static char ID; // Pass identification.
void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
- AU.addRequired<DXILResourceWrapperPass>();
- AU.addPreserved<DXILResourceWrapperPass>();
+ AU.addRequired<DXILResourceTypeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
}
};
@@ -187,7 +183,7 @@ char DXILResourceAccessLegacy::ID = 0;
INITIALIZE_PASS_BEGIN(DXILResourceAccessLegacy, DEBUG_TYPE,
"DXIL Resource Access", false, false)
-INITIALIZE_PASS_DEPENDENCY(DXILResourceWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DXILResourceTypeWrapperPass)
INITIALIZE_PASS_END(DXILResourceAccessLegacy, DEBUG_TYPE,
"DXIL Resource Access", false, false)
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index c231df875bb218..3b701e6ca09761 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -18,7 +18,6 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/DXILResource.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Argument.h"
@@ -352,7 +351,6 @@ void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<DXILResourceWrapperPass>();
}
char ScalarizerLegacyPass::ID = 0;
@@ -1350,6 +1348,5 @@ PreservedAnalyses ScalarizerPass::run(Function &F, FunctionAnalysisManager &AM)
bool Changed = Impl.visit(F);
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
- PA.preserve<DXILResourceAnalysis>();
return Changed ? PA : PreservedAnalyses::all();
}
diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
index ed484f288d9d09..65771356ed5e63 100644
--- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll
+++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
@@ -5,8 +5,8 @@
; CHECK-LABEL: Pass Arguments:
; CHECK-NEXT: Target Library Information
-; CHECK-NEXT: Target Transform Information
; CHECK-NEXT: DXIL Resource Type Analysis
+; CHECK-NEXT: Target Transform Information
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: DXIL Finalize Linkage
; CHECK-NEXT: DXIL Intrinsic Expansion
>From 958f7ec85a3d8ac41c058a90c144962a85f596b4 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Thu, 12 Dec 2024 14:46:08 -0800
Subject: [PATCH 6/6] fixup: Clean up leftover declarations
---
llvm/lib/Target/DirectX/DXILOpLowering.cpp | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index c66b24442d4bd0..c48e716e78ccb6 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -553,6 +553,14 @@ class OpLowerer {
});
}
+ [[nodiscard]] bool lowerGetPointer(Function &F) {
+ // These should have already been handled in DXILResourceAccess, so we can
+ // just clean up the dead prototype.
+ assert(F.user_empty() && "getpointer operations should have been removed");
+ F.eraseFromParent();
+ return false;
+ }
+
[[nodiscard]] bool lowerTypedBufferStore(Function &F) {
IRBuilder<> &IRB = OpBuilder.getIRB();
Type *Int8Ty = IRB.getInt8Ty();
@@ -706,6 +714,9 @@ class OpLowerer {
case Intrinsic::dx_handle_fromBinding:
HasErrors |= lowerHandleFromBinding(F);
break;
+ case Intrinsic::dx_resource_getpointer:
+ HasErrors |= lowerGetPointer(F);
+ break;
case Intrinsic::dx_typedBufferLoad:
HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/false);
break;
More information about the llvm-branch-commits
mailing list