[llvm] [DirectX] Implement `memcpy` in DXIL CBuffer Access pass (PR #144436)
Deric C. via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 16 14:57:09 PDT 2025
https://github.com/Icohedron updated https://github.com/llvm/llvm-project/pull/144436
>From f1988f43da9b901d40ce6cee8c34ae6bbd8b47af Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Mon, 2 Jun 2025 22:53:53 +0000
Subject: [PATCH 1/2] Expect only Load and GEP users of Globals, and report
error otherwise
---
llvm/lib/Target/DirectX/DXILCBufferAccess.cpp | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
index 7559f61b4cfb9..cfd7cb95ca8f0 100644
--- a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
@@ -11,9 +11,12 @@
#include "llvm/Frontend/HLSL/CBuffer.h"
#include "llvm/Frontend/HLSL/HLSLResource.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsDirectX.h"
+#include "llvm/IR/Operator.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/Utils/Local.h"
#define DEBUG_TYPE "dxil-cbuffer-access"
@@ -161,7 +164,12 @@ static void replaceAccessesWithHandle(GlobalVariable *Global,
}
// Otherwise, walk users looking for a load...
- ToProcess.append(Cur->user_begin(), Cur->user_end());
+ if (isa<GetElementPtrInst>(Cur) || isa<GEPOperator>(Cur)) {
+ ToProcess.append(Cur->user_begin(), Cur->user_end());
+ continue;
+ }
+
+ reportFatalInternalError("Unexpected user of Global");
}
RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
}
>From 19102b135bd1228c9275eb8e3821d53bade5b240 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Fri, 13 Jun 2025 18:07:19 +0000
Subject: [PATCH 2/2] Implement memcpy support in DXIL CBufferAccess pass
---
llvm/lib/Target/DirectX/DXILCBufferAccess.cpp | 293 +++++++++++++-----
.../CodeGen/DirectX/CBufferAccess/memcpy.ll | 204 ++++++++++++
2 files changed, 415 insertions(+), 82 deletions(-)
create mode 100644 llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll
diff --git a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
index cfd7cb95ca8f0..e4a0ad0dd6159 100644
--- a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
@@ -17,6 +17,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Transforms/Utils/Local.h"
#define DEBUG_TYPE "dxil-cbuffer-access"
@@ -57,109 +58,236 @@ struct CBufferRowIntrin {
}
}
};
-} // namespace
-static size_t getOffsetForCBufferGEP(GEPOperator *GEP, GlobalVariable *Global,
- const DataLayout &DL) {
- // Since we should always have a constant offset, we should only ever have a
- // single GEP of indirection from the Global.
- assert(GEP->getPointerOperand() == Global &&
- "Indirect access to resource handle");
+// Helper for creating CBuffer handles and loading data from them
+struct CBufferResource {
+ GlobalVariable *GVHandle;
+ GlobalVariable *Member;
+ size_t MemberOffset;
- APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
- bool Success = GEP->accumulateConstantOffset(DL, ConstantOffset);
- (void)Success;
- assert(Success && "Offsets into cbuffer globals must be constant");
+ LoadInst *Handle;
- if (auto *ATy = dyn_cast<ArrayType>(Global->getValueType()))
- ConstantOffset = hlsl::translateCBufArrayOffset(DL, ConstantOffset, ATy);
+ CBufferResource(GlobalVariable *GVHandle, GlobalVariable *Member,
+ size_t MemberOffset)
+ : GVHandle(GVHandle), Member(Member), MemberOffset(MemberOffset) {}
- return ConstantOffset.getZExtValue();
-}
+ const DataLayout &getDataLayout() { return GVHandle->getDataLayout(); }
+ Type *getValueType() { return Member->getValueType(); }
+ iterator_range<ConstantDataSequential::user_iterator> users() {
+ return Member->users();
+ }
-/// Replace access via cbuffer global with a load from the cbuffer handle
-/// itself.
-static void replaceAccess(LoadInst *LI, GlobalVariable *Global,
- GlobalVariable *HandleGV, size_t BaseOffset,
- SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
- const DataLayout &DL = HandleGV->getDataLayout();
+ /// Get the byte offset of a Pointer-typed Value * `Val` relative to Member.
+ /// `Val` can either be Member itself, or a GEP of a constant offset from
+ /// Member
+ size_t getOffsetForCBufferGEP(Value *Val) {
+ assert(isa<PointerType>(Val->getType()) &&
+ "Expected a pointer-typed value");
+
+ if (Val == Member)
+ return 0;
+
+ if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
+ // Since we should always have a constant offset, we should only ever have
+ // a single GEP of indirection from the Global.
+ assert(GEP->getPointerOperand() == Member &&
+ "Indirect access to resource handle");
+
+ const DataLayout &DL = getDataLayout();
+ APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
+ bool Success = GEP->accumulateConstantOffset(DL, ConstantOffset);
+ (void)Success;
+ assert(Success && "Offsets into cbuffer globals must be constant");
+
+ if (auto *ATy = dyn_cast<ArrayType>(Member->getValueType()))
+ ConstantOffset =
+ hlsl::translateCBufArrayOffset(DL, ConstantOffset, ATy);
+
+ return ConstantOffset.getZExtValue();
+ }
- size_t Offset = BaseOffset;
- if (auto *GEP = dyn_cast<GEPOperator>(LI->getPointerOperand()))
- Offset += getOffsetForCBufferGEP(GEP, Global, DL);
- else if (LI->getPointerOperand() != Global)
- llvm_unreachable("Load instruction doesn't reference cbuffer global");
+ llvm_unreachable("Expected Val to be a GlobalVariable or GEP");
+ }
- IRBuilder<> Builder(LI);
- auto *Handle = Builder.CreateLoad(HandleGV->getValueType(), HandleGV,
- HandleGV->getName());
-
- Type *Ty = LI->getType();
- CBufferRowIntrin Intrin(DL, Ty->getScalarType());
- // The cbuffer consists of some number of 16-byte rows.
- unsigned int CurrentRow = Offset / hlsl::CBufferRowSizeInBytes;
- unsigned int CurrentIndex =
- (Offset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize;
-
- auto *CBufLoad = Builder.CreateIntrinsic(
- Intrin.RetTy, Intrin.IID,
- {Handle, ConstantInt::get(Builder.getInt32Ty(), CurrentRow)}, nullptr,
- LI->getName());
- auto *Elt =
- Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, LI->getName());
-
- Value *Result = nullptr;
- unsigned int Remaining =
- ((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1;
- if (Remaining == 0) {
- // We only have a single element, so we're done.
- Result = Elt;
-
- // However, if we loaded a <1 x T>, then we need to adjust the type here.
- if (auto *VT = dyn_cast<FixedVectorType>(LI->getType())) {
- assert(VT->getNumElements() == 1 && "Can't have multiple elements here");
- Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result,
- Builder.getInt32(0));
- }
- } else {
- // Walk each element and extract it, wrapping to new rows as needed.
- SmallVector<Value *> Extracts{Elt};
- while (Remaining--) {
- CurrentIndex %= Intrin.NumElts;
-
- if (CurrentIndex == 0)
- CBufLoad = Builder.CreateIntrinsic(
- Intrin.RetTy, Intrin.IID,
- {Handle, ConstantInt::get(Builder.getInt32Ty(), ++CurrentRow)},
- nullptr, LI->getName());
-
- Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++},
- LI->getName()));
+ /// Create a handle for this cbuffer resource using the IRBuilder `Builder`
+ /// and sets the handle as the current one to use for subsequent calls to
+ /// `loadValue`
+ void createAndSetCurrentHandle(IRBuilder<> &Builder) {
+ Handle = Builder.CreateLoad(GVHandle->getValueType(), GVHandle,
+ GVHandle->getName());
+ }
+
+ /// Load a value of type `Ty` at offset `Offset` using the handle from the
+ /// last call to `createAndSetCurrentHandle`
+ Value *loadValue(IRBuilder<> &Builder, Type *Ty, size_t Offset,
+ const Twine &Name = "") {
+ assert(Handle &&
+ "Expected a handle for this cbuffer global resource to be created "
+ "before loading a value from it");
+ const DataLayout &DL = getDataLayout();
+
+ size_t TargetOffset = MemberOffset + Offset;
+ CBufferRowIntrin Intrin(DL, Ty->getScalarType());
+ // The cbuffer consists of some number of 16-byte rows.
+ unsigned int CurrentRow = TargetOffset / hlsl::CBufferRowSizeInBytes;
+ unsigned int CurrentIndex =
+ (TargetOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize;
+
+ auto *CBufLoad = Builder.CreateIntrinsic(
+ Intrin.RetTy, Intrin.IID,
+ {Handle, ConstantInt::get(Builder.getInt32Ty(), CurrentRow)}, nullptr,
+ Name + ".load");
+ auto *Elt = Builder.CreateExtractValue(CBufLoad, {CurrentIndex++},
+ Name + ".extract");
+
+ Value *Result = nullptr;
+ unsigned int Remaining =
+ ((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1;
+ if (Remaining == 0) {
+ // We only have a single element, so we're done.
+ Result = Elt;
+
+ // However, if we loaded a <1 x T>, then we need to adjust the type here.
+ if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
+ assert(VT->getNumElements() == 1 &&
+ "Can't have multiple elements here");
+ Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result,
+ Builder.getInt32(0), Name);
+ }
+ } else {
+ // Walk each element and extract it, wrapping to new rows as needed.
+ SmallVector<Value *> Extracts{Elt};
+ while (Remaining--) {
+ CurrentIndex %= Intrin.NumElts;
+
+ if (CurrentIndex == 0)
+ CBufLoad = Builder.CreateIntrinsic(
+ Intrin.RetTy, Intrin.IID,
+ {Handle, ConstantInt::get(Builder.getInt32Ty(), ++CurrentRow)},
+ nullptr, Name + ".load");
+
+ Extracts.push_back(Builder.CreateExtractValue(
+ CBufLoad, {CurrentIndex++}, Name + ".extract"));
+ }
+
+ // Finally, we build up the original loaded value.
+ Result = PoisonValue::get(Ty);
+ for (int I = 0, E = Extracts.size(); I < E; ++I)
+ Result = Builder.CreateInsertElement(Result, Extracts[I],
+ Builder.getInt32(I),
+ Name + formatv(".upto{}", I));
}
- // Finally, we build up the original loaded value.
- Result = PoisonValue::get(Ty);
- for (int I = 0, E = Extracts.size(); I < E; ++I)
- Result =
- Builder.CreateInsertElement(Result, Extracts[I], Builder.getInt32(I));
+ return Result;
}
+};
+} // namespace
+
+/// Replace load via cbuffer global with a load from the cbuffer handle itself.
+static void replaceLoad(LoadInst *LI, CBufferResource &CBR,
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
+ size_t Offset = CBR.getOffsetForCBufferGEP(LI->getPointerOperand());
+ IRBuilder<> Builder(LI);
+ CBR.createAndSetCurrentHandle(Builder);
+ Value *Result = CBR.loadValue(Builder, LI->getType(), Offset, LI->getName());
LI->replaceAllUsesWith(Result);
DeadInsts.push_back(LI);
}
-static void replaceAccessesWithHandle(GlobalVariable *Global,
- GlobalVariable *HandleGV,
- size_t BaseOffset) {
+/// Replace memcpy from a cbuffer global with a memcpy from the cbuffer handle
+/// itself. Assumes the cbuffer global is an array, and the length of bytes to
+/// copy is divisible by array element allocation size.
+/// The memcpy source must also be a direct cbuffer global reference, not a GEP.
+static void replaceMemCpy(MemCpyInst *MCI, CBufferResource &CBR,
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
+
+ ArrayType *ArrTy = dyn_cast<ArrayType>(CBR.getValueType());
+ assert(ArrTy && "MemCpy lowering is only supported for array types");
+
+ // This assumption vastly simplifies the implementation
+ if (MCI->getSource() != CBR.Member)
+ reportFatalUsageError(
+ "Expected MemCpy source to be a cbuffer global variable");
+
+ const std::string Name = ("memcpy." + MCI->getDest()->getName() + "." +
+ MCI->getSource()->getName())
+ .str();
+
+ ConstantInt *Length = dyn_cast<ConstantInt>(MCI->getLength());
+ uint64_t ByteLength = Length->getZExtValue();
+
+ // If length to copy is zero, no memcpy is needed
+ if (ByteLength == 0) {
+ DeadInsts.push_back(MCI);
+ return;
+ }
+
+ const DataLayout &DL = CBR.getDataLayout();
+
+ Type *ElemTy = ArrTy->getElementType();
+ size_t ElemSize = DL.getTypeAllocSize(ElemTy);
+ assert(ByteLength % ElemSize == 0 &&
+ "Length of bytes to MemCpy must be divisible by allocation size of "
+ "source/destination array elements");
+ size_t ElemsToCpy = ByteLength / ElemSize;
+
+ IRBuilder<> Builder(MCI);
+ CBR.createAndSetCurrentHandle(Builder);
+
+ auto CopyElemsImpl = [&Builder, &MCI, &Name, &CBR,
+ &DL](const auto &Self, ArrayType *ArrTy,
+ size_t ArrOffset, size_t N) -> void {
+ Type *ElemTy = ArrTy->getElementType();
+ size_t ElemTySize = DL.getTypeAllocSize(ElemTy);
+ for (unsigned I = 0; I < N; ++I) {
+ size_t Offset = ArrOffset + I * ElemTySize;
+
+ // Recursively copy nested arrays
+ if (ArrayType *ElemArrTy = dyn_cast<ArrayType>(ElemTy)) {
+ Self(Self, ElemArrTy, Offset, ElemArrTy->getNumElements());
+ continue;
+ }
+
+ // Load CBuffer value and store it in Dest
+ APInt CBufArrayOffset(
+ DL.getIndexTypeSizeInBits(MCI->getSource()->getType()), Offset);
+ CBufArrayOffset =
+ hlsl::translateCBufArrayOffset(DL, CBufArrayOffset, ArrTy);
+ Value *CBufferVal =
+ CBR.loadValue(Builder, ElemTy, CBufArrayOffset.getZExtValue(), Name);
+ Value *GEP =
+ Builder.CreateInBoundsGEP(Builder.getInt8Ty(), MCI->getDest(),
+ {Builder.getInt32(Offset)}, Name + ".dest");
+ Builder.CreateStore(CBufferVal, GEP, MCI->isVolatile());
+ }
+ };
+ auto CopyElems = [&CopyElemsImpl](ArrayType *ArrTy, size_t N) -> void {
+ CopyElemsImpl(CopyElemsImpl, ArrTy, 0, N);
+ };
+
+ CopyElems(ArrTy, ElemsToCpy);
+
+ MCI->eraseFromParent();
+}
+
+static void replaceAccessesWithHandle(CBufferResource &CBR) {
SmallVector<WeakTrackingVH> DeadInsts;
- SmallVector<User *> ToProcess{Global->users()};
+ SmallVector<User *> ToProcess{CBR.users()};
while (!ToProcess.empty()) {
User *Cur = ToProcess.pop_back_val();
// If we have a load instruction, replace the access.
if (auto *LI = dyn_cast<LoadInst>(Cur)) {
- replaceAccess(LI, Global, HandleGV, BaseOffset, DeadInsts);
+ replaceLoad(LI, CBR, DeadInsts);
+ continue;
+ }
+
+ // If we have a memcpy instruction, replace it with multiple accesses and
+ // subsequent stores to the destination
+ if (auto *MCI = dyn_cast<MemCpyInst>(Cur)) {
+ replaceMemCpy(MCI, CBR, DeadInsts);
continue;
}
@@ -181,7 +309,8 @@ static bool replaceCBufferAccesses(Module &M) {
for (const hlsl::CBufferMapping &Mapping : *CBufMD)
for (const hlsl::CBufferMember &Member : Mapping.Members) {
- replaceAccessesWithHandle(Member.GV, Mapping.Handle, Member.Offset);
+ CBufferResource CBR(Mapping.Handle, Member.GV, Member.Offset);
+ replaceAccessesWithHandle(CBR);
Member.GV->removeFromParent();
}
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll
new file mode 100644
index 0000000000000..2cf7327d8c195
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll
@@ -0,0 +1,204 @@
+; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s
+
+; cbuffer CB : register(b0) {
+; float a1[3];
+; double3 a2[2];
+; float16_t a3[2][2];
+; uint64_t a4[3];
+; int2 a5[3][2];
+; uint16_t a6[1];
+; int64_t a7[2];
+; bool a8[4];
+; }
+%__cblayout_CB = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [3 x [2 x <2 x i32>]], [1 x i16], [2 x i64], [4 x i32] }>
+
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) poison
+ at a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4
+ at a2 = external local_unnamed_addr addrspace(2) global [2 x <3 x double>], align 32
+ at a3 = external local_unnamed_addr addrspace(2) global [2 x [2 x half]], align 2
+ at a4 = external local_unnamed_addr addrspace(2) global [3 x i64], align 8
+ at a5 = external local_unnamed_addr addrspace(2) global [3 x [2 x <2 x i32>]], align 16
+ at a6 = external local_unnamed_addr addrspace(2) global [1 x i16], align 2
+ at a7 = external local_unnamed_addr addrspace(2) global [2 x i64], align 8
+ at a8 = external local_unnamed_addr addrspace(2) global [4 x i32], align 4
+
+; CHECK: define void @f(
+define void @f(ptr %dst) {
+entry:
+ %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+ store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) %CB.cb_h.i.i, ptr @CB.cb, align 4
+
+ %a1.copy = alloca [3 x float], align 4
+ %a2.copy = alloca [2 x <3 x double>], align 32
+ %a3.copy = alloca [2 x [2 x half]], align 2
+ %a4.copy = alloca [3 x i64], align 8
+ %a5.copy = alloca [3 x [2 x <2 x i32>]], align 16
+ %a6.copy = alloca [1 x i16], align 2
+ %a7.copy = alloca [2 x i64], align 8
+ %a8.copy = alloca [4 x i32], align 4
+
+; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
+; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
+; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY:%.*]], i32 0
+; CHECK: store float [[X]], ptr [[DEST]], align 4
+; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+; CHECK: [[Y:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY]], i32 4
+; CHECK: store float [[Y]], ptr [[DEST]], align 4
+; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 2)
+; CHECK: [[Z:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY]], i32 8
+; CHECK: store float [[Z]], ptr [[DEST]], align 4
+ call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a1.copy, ptr addrspace(2) align 4 @a1, i32 12, i1 false)
+
+; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
+; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 3)
+; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
+; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
+; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 4)
+; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
+; CHECK: [[UPTO0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
+; CHECK: [[UPTO1:%.*]] = insertelement <3 x double> [[UPTO0]], double [[Y]], i32 1
+; CHECK: [[UPTO2:%.*]] = insertelement <3 x double> [[UPTO1]], double [[Z]], i32 2
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY:%.*]], i32 0
+; CHECK: store <3 x double> [[UPTO2]], ptr [[DEST]], align 32
+; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5)
+; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
+; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
+; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 6)
+; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
+; CHECK: [[UPTO0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
+; CHECK: [[UPTO1:%.*]] = insertelement <3 x double> [[UPTO0]], double [[Y]], i32 1
+; CHECK: [[UPTO2:%.*]] = insertelement <3 x double> [[UPTO1]], double [[Z]], i32 2
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY]], i32 32
+; CHECK: store <3 x double> [[UPTO2]], ptr [[DEST]], align 32
+ call void @llvm.memcpy.p0.p2.i32(ptr align 32 %a2.copy, ptr addrspace(2) align 32 @a2, i32 64, i1 false)
+
+; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
+; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 7)
+; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY:%.*]], i32 0
+; CHECK: store half [[X]], ptr [[DEST]], align 2
+; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 8)
+; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 2
+; CHECK: store half [[Y]], ptr [[DEST]], align 2
+; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 9)
+; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 4
+; CHECK: store half [[X]], ptr [[DEST]], align 2
+; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 10)
+; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 6
+; CHECK: store half [[Y]], ptr [[DEST]], align 2
+ call void @llvm.memcpy.p0.p2.i32(ptr align 2 %a3.copy, ptr addrspace(2) align 2 @a3, i32 8, i1 false)
+
+; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
+; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 11)
+; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY:%.*]], i32 0
+; CHECK: store i64 [[X]], ptr [[DEST]], align 8
+; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 12)
+; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY]], i32 8
+; CHECK: store i64 [[Y]], ptr [[DEST]], align 8
+; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 13)
+; CHECK: [[Z:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY]], i32 16
+; CHECK: store i64 [[Z]], ptr [[DEST]], align 8
+ call void @llvm.memcpy.p0.p2.i32(ptr align 8 %a4.copy, ptr addrspace(2) align 8 @a4, i32 24, i1 false)
+
+; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
+; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 14)
+; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
+; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
+; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
+; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY:%.*]], i32 0
+; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
+; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 15)
+; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
+; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
+; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
+; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 8
+; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
+; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 16)
+; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
+; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
+; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
+; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 16
+; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
+; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 17)
+; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
+; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
+; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
+; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 24
+; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
+; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 18)
+; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
+; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
+; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
+; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 32
+; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
+; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 19)
+; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
+; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
+; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
+; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 40
+; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
+ call void @llvm.memcpy.p0.p2.i32(ptr align 16 %a5.copy, ptr addrspace(2) align 16 @a5, i32 48, i1 false)
+
+; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
+; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 17)
+; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A6_COPY:%.*]], i32 0
+; CHECK: store i16 [[X]], ptr [[DEST]], align 2
+ call void @llvm.memcpy.p0.p2.i32(ptr align 2 %a6.copy, ptr addrspace(2) align 2 @a6, i32 2, i1 false)
+
+; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
+; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 18)
+; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A7_COPY:%.*]], i32 0
+; CHECK: store i64 [[X]], ptr [[DEST]], align 8
+; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 19)
+; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A7_COPY]], i32 8
+; CHECK: store i64 [[Y]], ptr [[DEST]], align 8
+ call void @llvm.memcpy.p0.p2.i32(ptr align 8 %a7.copy, ptr addrspace(2) align 8 @a7, i32 16, i1 false)
+
+; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
+; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 20)
+; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY:%.*]], i32 0
+; CHECK: store i32 [[X]], ptr [[DEST]], align 4
+; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 21)
+; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 4
+; CHECK: store i32 [[Y]], ptr [[DEST]], align 4
+; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 22)
+; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 8
+; CHECK: store i32 [[Z]], ptr [[DEST]], align 4
+; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 23)
+; CHECK: [[W:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
+; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 12
+; CHECK: store i32 [[W]], ptr [[DEST]], align 4
+ call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a8.copy, ptr addrspace(2) align 4 @a8, i32 16, i1 false)
+
+ ret void
+}
+
+declare void @llvm.memcpy.p0.p2.i32(ptr noalias writeonly captures(none), ptr addrspace(2) noalias readonly captures(none), i32, i1 immarg)
+
+; CHECK-NOT: !hlsl.cbs =
+!hlsl.cbs = !{!0}
+
+!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6, ptr addrspace(2) @a7, ptr addrspace(2) @a8}
+!1 = !{i32 0, i32 2}
+!2 = !{}
More information about the llvm-commits
mailing list