[llvm] [DirectX] Add support for typedBufferLoad and Store for RWBuffer<double2> and RWBuffer<double> (PR #139996)
Sarah Spall via llvm-commits
llvm-commits at lists.llvm.org
Thu May 15 11:36:15 PDT 2025
https://github.com/spall updated https://github.com/llvm/llvm-project/pull/139996
>From 3dfe5dc44c5b555e3b516408501d908c1d3a4b9d Mon Sep 17 00:00:00 2001
From: Sarah Spall <sarahspall at microsoft.com>
Date: Mon, 12 May 2025 08:24:00 -0700
Subject: [PATCH 1/3] expand buffer load and store + tests
---
.../Target/DirectX/DXILIntrinsicExpansion.cpp | 91 +++++++++++++++++++
llvm/test/CodeGen/DirectX/BufferLoad.ll | 32 +++++++
llvm/test/CodeGen/DirectX/BufferLoadDouble.ll | 58 ++++++++++++
llvm/test/CodeGen/DirectX/BufferStore.ll | 41 +++++++++
.../test/CodeGen/DirectX/BufferStoreDouble.ll | 47 ++++++++++
5 files changed, 269 insertions(+)
create mode 100644 llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
create mode 100644 llvm/test/CodeGen/DirectX/BufferStoreDouble.ll
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index cff8d637dcb87..bfa41b36166aa 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -70,6 +70,15 @@ static bool isIntrinsicExpansion(Function &F) {
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_fadd:
return true;
+ case Intrinsic::dx_resource_load_typedbuffer: // want to transform double and
+ // double2
+ return F.getReturnType()
+ ->getStructElementType(0)
+ ->getScalarType()
+ ->isDoubleTy();
+ case Intrinsic::dx_resource_store_typedbuffer: // want to transform double and
+ // double2
+ return F.getFunctionType()->getParamType(2)->getScalarType()->isDoubleTy();
}
return false;
}
@@ -532,6 +541,80 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) {
return Builder.CreateFMul(X, PiOver180);
}
+static void expandTypedBufferLoadIntrinsic(CallInst *Orig) {
+ IRBuilder<> Builder(Orig);
+
+ unsigned ExtractNum =
+ Orig->getType()->getStructElementType(0)->isVectorTy() ? 4 : 2;
+ Type *Ty = VectorType::get(Builder.getInt32Ty(), ExtractNum, false);
+
+ Type *LoadType = StructType::get(Ty, Builder.getInt1Ty());
+ auto *X =
+ Builder.CreateIntrinsic(LoadType, Intrinsic::dx_resource_load_typedbuffer,
+ {Orig->getOperand(0), Orig->getOperand(1)});
+
+ // create new extract value
+ Value *Extract = Builder.CreateExtractValue(X, {0});
+
+ SmallVector<Value *> ExtractElements;
+ for (unsigned I = 0; I < ExtractNum; ++I)
+ ExtractElements.push_back(
+ Builder.CreateExtractElement(Extract, (uint64_t)I));
+
+ // combine into double(s)
+ Value *Result =
+ PoisonValue::get(VectorType::get(Builder.getDoubleTy(), 2, false));
+ for (unsigned I = 0; I < ExtractNum; I += 2) {
+ Value *Dbl =
+ Builder.CreateIntrinsic(Builder.getDoubleTy(), Intrinsic::dx_asdouble,
+ {ExtractElements[I], ExtractElements[I + 1]});
+ if (ExtractNum == 4)
+ Result = Builder.CreateInsertElement(Result, Dbl, (uint64_t)I / 2);
+ else
+ Result = Dbl;
+ }
+
+ assert(Orig->hasOneUser() && "TypedBufferLoad is expected to have one user");
+ auto *U = Orig->user_back();
+ auto *OldExtract = dyn_cast<ExtractValueInst>(U);
+ if (!OldExtract)
+ llvm_unreachable("TypedBufferLoad's only users should be ExtractValueInst");
+ OldExtract->replaceAllUsesWith(Result);
+ OldExtract->eraseFromParent();
+}
+
+void expandTypedBufferStoreIntrinsic(CallInst *Orig) {
+ IRBuilder<> Builder(Orig);
+
+ unsigned ExtractNum =
+ Orig->getFunctionType()->getParamType(2)->isVectorTy() ? 4 : 2;
+ Type *SplitElementTy = Builder.getInt32Ty();
+ SmallVector<int> Mask = {0, 1};
+ if (ExtractNum == 4) {
+ SplitElementTy = VectorType::get(SplitElementTy, 2, false);
+ Mask = {0, 2, 1, 3};
+ }
+
+ // split our double(s)
+ auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);
+ Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
+ Orig->getOperand(2));
+ // create our vector
+ Value *LowBits = Builder.CreateExtractValue(Split, 0);
+ Value *HighBits = Builder.CreateExtractValue(Split, 1);
+ Value *Val;
+ if (ExtractNum == 2) {
+ Val = PoisonValue::get(VectorType::get(SplitElementTy, 2, false));
+ Val = Builder.CreateInsertElement(Val, LowBits, (uint64_t)0);
+ Val = Builder.CreateInsertElement(Val, HighBits, 1);
+ } else
+ Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});
+
+ Builder.CreateIntrinsic(Builder.getVoidTy(),
+ Intrinsic::dx_resource_store_typedbuffer,
+ {Orig->getOperand(0), Orig->getOperand(1), Val});
+}
+
static Intrinsic::ID getMaxForClamp(Intrinsic::ID ClampIntrinsic) {
if (ClampIntrinsic == Intrinsic::dx_uclamp)
return Intrinsic::umax;
@@ -660,6 +743,14 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
case Intrinsic::dx_radians:
Result = expandRadiansIntrinsic(Orig);
break;
+ case Intrinsic::dx_resource_load_typedbuffer:
+ expandTypedBufferLoadIntrinsic(Orig);
+ Orig->eraseFromParent();
+ return true;
+ case Intrinsic::dx_resource_store_typedbuffer:
+ expandTypedBufferStoreIntrinsic(Orig);
+ Orig->eraseFromParent();
+ return true;
case Intrinsic::usub_sat:
Result = expandUsubSat(Orig);
break;
diff --git a/llvm/test/CodeGen/DirectX/BufferLoad.ll b/llvm/test/CodeGen/DirectX/BufferLoad.ll
index 6d5146a9026ce..96bfbb8db95ce 100644
--- a/llvm/test/CodeGen/DirectX/BufferLoad.ll
+++ b/llvm/test/CodeGen/DirectX/BufferLoad.ll
@@ -197,4 +197,36 @@ define void @loadv4i16() {
ret void
}
+define void @loadf64() {
+ ; show dxil op lower can handle typedbuffer load where target is double but load type is <2 x i32>
+ ; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false) #0
+ %buffer = call target("dx.TypedBuffer", double, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+ i32 0, i32 1, i32 1, i32 0, i1 false)
+
+ ; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]], %dx.types.ResourceProperties { i32 4106, i32 266 }) #0
+ %load = call { <2 x i32>, i1 } @llvm.dx.resource.load.typedbuffer(
+ target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0)
+
+; CHECK: call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[BA]], i32 0, i32 undef) #1
+ %val = extractvalue { <2 x i32>, i1 } %load, 0
+ ret void
+}
+
+define void @loadv2f64() {
+ ; show dxil op lower can handle typedbuffer load where target is double2 but load type is <4 x i32>
+ ; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false) #0
+ %buffer = call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+ i32 0, i32 1, i32 1, i32 0, i1 false)
+
+ ; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]], %dx.types.ResourceProperties { i32 4106, i32 522 }) #0
+ %load = call { <4 x i32>, i1 } @llvm.dx.resource.load.typedbuffer(
+ target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0)
+
+ ; CHECK: call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[BA]], i32 0, i32 undef) #1
+ %val = extractvalue { <4 x i32>, i1 } %load, 0
+ ret void
+}
+
; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(read) {{.*}}}
diff --git a/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll b/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
new file mode 100644
index 0000000000000..53ed74b9868e4
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
@@ -0,0 +1,58 @@
+; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+define void @loadf64() {
+ ; check the handle from binding is unchanged
+ ; CHECK: [[B:%.*]] = call target("dx.TypedBuffer", double, 1, 0, 0)
+ ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+ ; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false)
+ %buffer = call target("dx.TypedBuffer", double, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+ i32 0, i32 1, i32 1, i32 0, i1 false)
+
+ ; check we load an <2 x i32> instead of a double
+ ; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 }
+ ; CHECK-SAME: @llvm.dx.resource.load.typedbuffer.v2i32.tdx.TypedBuffer_f64_1_0_0t(
+ ; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0)
+ %load0 = call {double, i1} @llvm.dx.resource.load.typedbuffer(
+ target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0)
+
+ ; check we extract the two i32 and construct a double
+ ; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0
+ ; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i64 0
+ ; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i64 1
+ ; CHECK: call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]])
+ %data0 = extractvalue {double, i1} %load0, 0
+ ret void
+}
+
+define void @loadv2f64() {
+ ; check the handle from binding is unchanged
+ ; CHECK: [[B:%.*]] = call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+ ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+ ; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false)
+ %buffer = call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+ i32 0, i32 1, i32 1, i32 0, i1 false)
+
+ ; check we load an <4 x i32> instead of a double2
+ ; CHECK: [[L0:%.*]] = call { <4 x i32>, i1 }
+ ; CHECK-SAME: @llvm.dx.resource.load.typedbuffer.v4i32.tdx.TypedBuffer_v2f64_1_0_0t(
+ ; CHECK-SAME: target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[B]], i32 0)
+ %load0 = call { <2 x double>, i1 } @llvm.dx.resource.load.typedbuffer(
+ target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0)
+
+ ; check we extract the 4 i32 and construct a <2 x double>
+ ; CHECK: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0
+ ; CHECK: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i64 0
+ ; CHECK: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i64 1
+ ; CHECK: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i64 2
+ ; CHECK: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i64 3
+ ; CHECK: [[Dbl1:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo1]], i32 [[Hi1]])
+ ; CHECK: [[Vec:%.*]] = insertelement <2 x double> poison, double [[Dbl1]], i64 0
+ ; CHECK: [[Dbl2:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo2]], i32 [[Hi2]])
+ ; CHECK: insertelement <2 x double> [[Vec]], double [[Dbl2]], i64 1
+ %data0 = extractvalue { <2 x double>, i1 } %load0, 0
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/BufferStore.ll b/llvm/test/CodeGen/DirectX/BufferStore.ll
index 363a3c723bfd5..e21047c9296d1 100644
--- a/llvm/test/CodeGen/DirectX/BufferStore.ll
+++ b/llvm/test/CodeGen/DirectX/BufferStore.ll
@@ -161,3 +161,44 @@ define void @store_scalarized_floats(float %data0, float %data1, float %data2, f
ret void
}
+
+define void @storef64(<2 x i32> %0) {
+ ; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217,
+ ; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]]
+
+ %buffer = tail call target("dx.TypedBuffer", double, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; The temporary casts should all have been cleaned up
+ ; CHECK-NOT: %dx.resource.casthandle
+
+ ; CHECK: [[D0:%.*]] = extractelement <2 x i32> %0, i32 0
+ ; CHECK: [[D1:%.*]] = extractelement <2 x i32> %0, i32 1
+ ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[BA]], i32 0, i32 undef, i32 %2, i32 %3, i32 %2, i32 %2, i8 15)
+ call void @llvm.dx.resource.store.typedbuffer(
+ target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0, <2 x i32> %0)
+ ret void
+}
+
+define void @storev2f64(<4 x i32> %0) {
+ ; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217,
+ ; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]]
+
+ %buffer = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; The temporary casts should all have been cleaned up
+ ; CHECK-NOT: %dx.resource.casthandle
+
+ ; CHECK: [[D0:%.*]] = extractelement <4 x i32> %0, i32 0
+ ; CHECK: [[D1:%.*]] = extractelement <4 x i32> %0, i32 1
+ ; CHECK: [[D2:%.*]] = extractelement <4 x i32> %0, i32 2
+ ; CHECK: [[D3:%.*]] = extractelement <4 x i32> %0, i32 3
+ ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[BA]], i32 0, i32 undef, i32 [[D0]], i32 [[D1]], i32 [[D2]], i32 [[D3]], i8 15)
+ call void @llvm.dx.resource.store.typedbuffer(
+ target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0,
+ <4 x i32> %0)
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll b/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll
new file mode 100644
index 0000000000000..bb4dbb5efb593
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll
@@ -0,0 +1,47 @@
+; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+define void @storef64(double %0) {
+ ; CHECK: [[B:%.*]] = tail call target("dx.TypedBuffer", double, 1, 0, 0)
+ ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+ ; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false)
+ %buffer = tail call target("dx.TypedBuffer", double, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; check we split the double and store the lo and hi bits
+ ; CHECK: [[SD:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double %0)
+ ; CHECK: [[Lo:%.*]] = extractvalue { i32, i32 } [[SD]], 0
+ ; CHECK: [[Hi:%.*]] = extractvalue { i32, i32 } [[SD]], 1
+ ; CHECK: [[Vec1:%.*]] = insertelement <2 x i32> poison, i32 [[Lo]], i64 0
+ ; CHECK: [[Vec2:%.*]] = insertelement <2 x i32> [[Vec1]], i32 [[Hi]], i64 1
+ ; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_f64_1_0_0t.v2i32(
+ ; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0, <2 x i32> [[Vec2]])
+ call void @llvm.dx.resource.store.typedbuffer(
+ target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0,
+ double %0)
+ ret void
+}
+
+
+define void @storev2f64(<2 x double> %0) {
+ ; CHECK: [[B:%.*]] = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+ ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+ ; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false)
+ %buffer = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+ i32 0, i32 0, i32 1, i32 0, i1 false)
+
+ ; CHECK: [[SD:%.*]] = call { <2 x i32>, <2 x i32> }
+ ; CHECK-SAME: @llvm.dx.splitdouble.v2i32(<2 x double> %0)
+ ; CHECK: [[Lo:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 0
+ ; CHECK: [[Hi:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 1
+ ; CHECK: [[Vec:%.*]] = shufflevector <2 x i32> [[Lo]], <2 x i32> [[Hi]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+ ; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2f64_1_0_0t.v4i32(
+ ; CHECK-SAME: target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[B]], i32 0, <4 x i32> [[Vec]])
+ call void @llvm.dx.resource.store.typedbuffer(
+ target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0,
+ <2 x double> %0)
+ ret void
+}
>From 39d443dc82a10199cfc23d38de5dc1b9f4e44e1b Mon Sep 17 00:00:00 2001
From: Sarah Spall <sarahspall at microsoft.com>
Date: Thu, 15 May 2025 08:51:42 -0700
Subject: [PATCH 2/3] respond to pr feedback
---
.../Target/DirectX/DXILIntrinsicExpansion.cpp | 69 ++++++++++---------
llvm/test/CodeGen/DirectX/BufferLoadDouble.ll | 53 +++++++++++++-
2 files changed, 89 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index bfa41b36166aa..d796efeb004bb 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -541,20 +541,27 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) {
return Builder.CreateFMul(X, PiOver180);
}
-static void expandTypedBufferLoadIntrinsic(CallInst *Orig) {
+static Value *expandTypedBufferLoadIntrinsic(CallInst *Orig) {
IRBuilder<> Builder(Orig);
- unsigned ExtractNum =
- Orig->getType()->getStructElementType(0)->isVectorTy() ? 4 : 2;
+ Type *BufferTy = Orig->getType()->getStructElementType(0);
+
+ unsigned ExtractNum = 2;
+ if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
+ assert(VT->getNumElements() == 2 &&
+ "TypedBufferLoad double vector has wrong size");
+ ExtractNum = 4;
+ }
+
Type *Ty = VectorType::get(Builder.getInt32Ty(), ExtractNum, false);
Type *LoadType = StructType::get(Ty, Builder.getInt1Ty());
- auto *X =
+ CallInst *Load =
Builder.CreateIntrinsic(LoadType, Intrinsic::dx_resource_load_typedbuffer,
{Orig->getOperand(0), Orig->getOperand(1)});
- // create new extract value
- Value *Extract = Builder.CreateExtractValue(X, {0});
+ // extract the buffer load's result
+ Value *Extract = Builder.CreateExtractValue(Load, {0});
SmallVector<Value *> ExtractElements;
for (unsigned I = 0; I < ExtractNum; ++I)
@@ -562,8 +569,7 @@ static void expandTypedBufferLoadIntrinsic(CallInst *Orig) {
Builder.CreateExtractElement(Extract, (uint64_t)I));
// combine into double(s)
- Value *Result =
- PoisonValue::get(VectorType::get(Builder.getDoubleTy(), 2, false));
+ Value *Result = PoisonValue::get(BufferTy);
for (unsigned I = 0; I < ExtractNum; I += 2) {
Value *Dbl =
Builder.CreateIntrinsic(Builder.getDoubleTy(), Intrinsic::dx_asdouble,
@@ -574,26 +580,29 @@ static void expandTypedBufferLoadIntrinsic(CallInst *Orig) {
Result = Dbl;
}
- assert(Orig->hasOneUser() && "TypedBufferLoad is expected to have one user");
- auto *U = Orig->user_back();
- auto *OldExtract = dyn_cast<ExtractValueInst>(U);
- if (!OldExtract)
- llvm_unreachable("TypedBufferLoad's only users should be ExtractValueInst");
- OldExtract->replaceAllUsesWith(Result);
- OldExtract->eraseFromParent();
+ Value *CheckBit = Builder.CreateExtractValue(Load, {1});
+
+ Value *Struct = PoisonValue::get(Orig->getType());
+ Struct = Builder.CreateInsertValue(Struct, Result, {0});
+ Struct = Builder.CreateInsertValue(Struct, CheckBit, {1});
+ return Struct;
}
-void expandTypedBufferStoreIntrinsic(CallInst *Orig) {
+static Value *expandTypedBufferStoreIntrinsic(CallInst *Orig) {
IRBuilder<> Builder(Orig);
- unsigned ExtractNum =
- Orig->getFunctionType()->getParamType(2)->isVectorTy() ? 4 : 2;
+ Type *BufferTy = Orig->getFunctionType()->getParamType(2);
+
+ unsigned ExtractNum = 2;
+ if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
+ assert(VT->getNumElements() == 2 &&
+ "TypedBufferStore double vector has wrong size");
+ ExtractNum = 4;
+ }
+
Type *SplitElementTy = Builder.getInt32Ty();
- SmallVector<int> Mask = {0, 1};
- if (ExtractNum == 4) {
+ if (ExtractNum == 4)
SplitElementTy = VectorType::get(SplitElementTy, 2, false);
- Mask = {0, 2, 1, 3};
- }
// split our double(s)
auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);
@@ -610,9 +619,9 @@ void expandTypedBufferStoreIntrinsic(CallInst *Orig) {
} else
Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});
- Builder.CreateIntrinsic(Builder.getVoidTy(),
- Intrinsic::dx_resource_store_typedbuffer,
- {Orig->getOperand(0), Orig->getOperand(1), Val});
+ return Builder.CreateIntrinsic(
+ Builder.getVoidTy(), Intrinsic::dx_resource_store_typedbuffer,
+ {Orig->getOperand(0), Orig->getOperand(1), Val});
}
static Intrinsic::ID getMaxForClamp(Intrinsic::ID ClampIntrinsic) {
@@ -744,13 +753,11 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
Result = expandRadiansIntrinsic(Orig);
break;
case Intrinsic::dx_resource_load_typedbuffer:
- expandTypedBufferLoadIntrinsic(Orig);
- Orig->eraseFromParent();
- return true;
+ Result = expandTypedBufferLoadIntrinsic(Orig);
+ break;
case Intrinsic::dx_resource_store_typedbuffer:
- expandTypedBufferStoreIntrinsic(Orig);
- Orig->eraseFromParent();
- return true;
+ Result = expandTypedBufferStoreIntrinsic(Orig);
+ break;
case Intrinsic::usub_sat:
Result = expandUsubSat(Orig);
break;
diff --git a/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll b/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
index 53ed74b9868e4..5e9f3979e9ef3 100644
--- a/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
+++ b/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
@@ -11,7 +11,10 @@ define void @loadf64() {
@llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
i32 0, i32 1, i32 1, i32 0, i1 false)
+ ret void
+
; check we load an <2 x i32> instead of a double
+ ; CHECK-NOT: call {double, i1} @llvm.dx.resource.load.typedbuffer
; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 }
; CHECK-SAME: @llvm.dx.resource.load.typedbuffer.v2i32.tdx.TypedBuffer_f64_1_0_0t(
; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0)
@@ -22,7 +25,12 @@ define void @loadf64() {
; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0
; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i64 0
; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i64 1
- ; CHECK: call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]])
+ ; CHECK: [[DBL:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]])
+ ; construct a new {double, i1}
+ ; CHECK: [[CB:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 1
+ ; CHECK: [[S1:%.*]] = insertvalue { double, i1 } poison, double [[DBL]], 0
+ ; CHECK: [[S2:%.*]] = insertvalue { double, i1 } [[S1]], i1 [[CB]], 1
+ ; CHECK: extractvalue { double, i1 } [[S2]], 0
%data0 = extractvalue {double, i1} %load0, 0
ret void
}
@@ -52,7 +60,48 @@ define void @loadv2f64() {
; CHECK: [[Dbl1:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo1]], i32 [[Hi1]])
; CHECK: [[Vec:%.*]] = insertelement <2 x double> poison, double [[Dbl1]], i64 0
; CHECK: [[Dbl2:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo2]], i32 [[Hi2]])
- ; CHECK: insertelement <2 x double> [[Vec]], double [[Dbl2]], i64 1
+ ; CHECK: [[Vec2:%.*]] = insertelement <2 x double> [[Vec]], double [[Dbl2]], i64 1
+ ; construct a new {<2 x double>, i1}
+ ; CHECK: [[CB:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 1
+ ; CHECK: [[S1:%.*]] = insertvalue { <2 x double>, i1 } poison, <2 x double> [[Vec2]], 0
+ ; CHECK: [[S2:%.*]] = insertvalue { <2 x double>, i1 } [[S1]], i1 [[CB]], 1
+ ; CHECK: extractvalue { <2 x double>, i1 } [[S2]], 0
%data0 = extractvalue { <2 x double>, i1 } %load0, 0
ret void
}
+
+; show we properly handle extracting the check bit
+define void @loadf64WithCheckBit() {
+ ; check the handle from binding is unchanged
+ ; CHECK: [[B:%.*]] = call target("dx.TypedBuffer", double, 1, 0, 0)
+ ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+ ; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false)
+ %buffer = call target("dx.TypedBuffer", double, 1, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+ i32 0, i32 1, i32 1, i32 0, i1 false)
+
+ ret void
+
+ ; check we load an <2 x i32> instead of a double
+ ; CHECK-NOT: call {double, i1} @llvm.dx.resource.load.typedbuffer
+ ; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 }
+ ; CHECK-SAME: @llvm.dx.resource.load.typedbuffer.v2i32.tdx.TypedBuffer_f64_1_0_0t(
+ ; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0)
+ %load0 = call {double, i1} @llvm.dx.resource.load.typedbuffer(
+ target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0)
+
+ ; check we extract the two i32 and construct a double
+ ; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0
+ ; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i64 0
+ ; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i64 1
+ ; CHECK: [[DBL:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]])
+ ; construct a new {double, i1}
+ ; CHECK: [[CB:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 1
+ ; CHECK: [[S1:%.*]] = insertvalue { double, i1 } poison, double [[DBL]], 0
+ ; CHECK: [[S2:%.*]] = insertvalue { double, i1 } [[S1]], i1 [[CB]], 1
+ ; CHECK: extractvalue { double, i1 } [[S2]], 0
+ %data0 = extractvalue {double, i1} %load0, 0
+ ; CHECK: extractvalue { double, i1 } [[S2]], 1
+ %cb = extractvalue {double, i1} %load0, 1
+ ret void
+}
\ No newline at end of file
>From 730bc337d89f40cf194c80acdd8589a5f5607505 Mon Sep 17 00:00:00 2001
From: Sarah Spall <sarahspall at microsoft.com>
Date: Thu, 15 May 2025 11:36:00 -0700
Subject: [PATCH 3/3] update comments
---
llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index d796efeb004bb..cbec0b31bca3c 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -70,14 +70,14 @@ static bool isIntrinsicExpansion(Function &F) {
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_fadd:
return true;
- case Intrinsic::dx_resource_load_typedbuffer: // want to transform double and
- // double2
+ case Intrinsic::dx_resource_load_typedbuffer:
+ // We need to handle doubles and vector of doubles.
return F.getReturnType()
->getStructElementType(0)
->getScalarType()
->isDoubleTy();
- case Intrinsic::dx_resource_store_typedbuffer: // want to transform double and
- // double2
+ case Intrinsic::dx_resource_store_typedbuffer:
+ // We need to handle doubles and vector of doubles.
return F.getFunctionType()->getParamType(2)->getScalarType()->isDoubleTy();
}
return false;
More information about the llvm-commits
mailing list