[llvm] [DirectX] Add support for typedBufferLoad and Store for RWBuffer<double2> and RWBuffer<double> (PR #139996)

Sarah Spall via llvm-commits llvm-commits at lists.llvm.org
Thu May 15 11:36:15 PDT 2025


https://github.com/spall updated https://github.com/llvm/llvm-project/pull/139996

>From 3dfe5dc44c5b555e3b516408501d908c1d3a4b9d Mon Sep 17 00:00:00 2001
From: Sarah Spall <sarahspall at microsoft.com>
Date: Mon, 12 May 2025 08:24:00 -0700
Subject: [PATCH 1/3] expand buffer load and store + tests

---
 .../Target/DirectX/DXILIntrinsicExpansion.cpp | 91 +++++++++++++++++++
 llvm/test/CodeGen/DirectX/BufferLoad.ll       | 32 +++++++
 llvm/test/CodeGen/DirectX/BufferLoadDouble.ll | 58 ++++++++++++
 llvm/test/CodeGen/DirectX/BufferStore.ll      | 41 +++++++++
 .../test/CodeGen/DirectX/BufferStoreDouble.ll | 47 ++++++++++
 5 files changed, 269 insertions(+)
 create mode 100644 llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
 create mode 100644 llvm/test/CodeGen/DirectX/BufferStoreDouble.ll

diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index cff8d637dcb87..bfa41b36166aa 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -70,6 +70,15 @@ static bool isIntrinsicExpansion(Function &F) {
   case Intrinsic::vector_reduce_add:
   case Intrinsic::vector_reduce_fadd:
     return true;
+  case Intrinsic::dx_resource_load_typedbuffer: // want to transform double and
+                                                // double2
+    return F.getReturnType()
+        ->getStructElementType(0)
+        ->getScalarType()
+        ->isDoubleTy();
+  case Intrinsic::dx_resource_store_typedbuffer: // want to transform double and
+                                                 // double2
+    return F.getFunctionType()->getParamType(2)->getScalarType()->isDoubleTy();
   }
   return false;
 }
@@ -532,6 +541,80 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) {
   return Builder.CreateFMul(X, PiOver180);
 }
 
+static void expandTypedBufferLoadIntrinsic(CallInst *Orig) {
+  IRBuilder<> Builder(Orig);
+
+  unsigned ExtractNum =
+      Orig->getType()->getStructElementType(0)->isVectorTy() ? 4 : 2;
+  Type *Ty = VectorType::get(Builder.getInt32Ty(), ExtractNum, false);
+
+  Type *LoadType = StructType::get(Ty, Builder.getInt1Ty());
+  auto *X =
+      Builder.CreateIntrinsic(LoadType, Intrinsic::dx_resource_load_typedbuffer,
+                              {Orig->getOperand(0), Orig->getOperand(1)});
+
+  // create new extract value
+  Value *Extract = Builder.CreateExtractValue(X, {0});
+
+  SmallVector<Value *> ExtractElements;
+  for (unsigned I = 0; I < ExtractNum; ++I)
+    ExtractElements.push_back(
+        Builder.CreateExtractElement(Extract, (uint64_t)I));
+
+  // combine into double(s)
+  Value *Result =
+      PoisonValue::get(VectorType::get(Builder.getDoubleTy(), 2, false));
+  for (unsigned I = 0; I < ExtractNum; I += 2) {
+    Value *Dbl =
+        Builder.CreateIntrinsic(Builder.getDoubleTy(), Intrinsic::dx_asdouble,
+                                {ExtractElements[I], ExtractElements[I + 1]});
+    if (ExtractNum == 4)
+      Result = Builder.CreateInsertElement(Result, Dbl, (uint64_t)I / 2);
+    else
+      Result = Dbl;
+  }
+
+  assert(Orig->hasOneUser() && "TypedBufferLoad is expected to have one user");
+  auto *U = Orig->user_back();
+  auto *OldExtract = dyn_cast<ExtractValueInst>(U);
+  if (!OldExtract)
+    llvm_unreachable("TypedBufferLoad's only users should be ExtractValueInst");
+  OldExtract->replaceAllUsesWith(Result);
+  OldExtract->eraseFromParent();
+}
+
+void expandTypedBufferStoreIntrinsic(CallInst *Orig) {
+  IRBuilder<> Builder(Orig);
+
+  unsigned ExtractNum =
+      Orig->getFunctionType()->getParamType(2)->isVectorTy() ? 4 : 2;
+  Type *SplitElementTy = Builder.getInt32Ty();
+  SmallVector<int> Mask = {0, 1};
+  if (ExtractNum == 4) {
+    SplitElementTy = VectorType::get(SplitElementTy, 2, false);
+    Mask = {0, 2, 1, 3};
+  }
+
+  // split our double(s)
+  auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);
+  Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
+                                         Orig->getOperand(2));
+  // create our vector
+  Value *LowBits = Builder.CreateExtractValue(Split, 0);
+  Value *HighBits = Builder.CreateExtractValue(Split, 1);
+  Value *Val;
+  if (ExtractNum == 2) {
+    Val = PoisonValue::get(VectorType::get(SplitElementTy, 2, false));
+    Val = Builder.CreateInsertElement(Val, LowBits, (uint64_t)0);
+    Val = Builder.CreateInsertElement(Val, HighBits, 1);
+  } else
+    Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});
+
+  Builder.CreateIntrinsic(Builder.getVoidTy(),
+                          Intrinsic::dx_resource_store_typedbuffer,
+                          {Orig->getOperand(0), Orig->getOperand(1), Val});
+}
+
 static Intrinsic::ID getMaxForClamp(Intrinsic::ID ClampIntrinsic) {
   if (ClampIntrinsic == Intrinsic::dx_uclamp)
     return Intrinsic::umax;
@@ -660,6 +743,14 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
   case Intrinsic::dx_radians:
     Result = expandRadiansIntrinsic(Orig);
     break;
+  case Intrinsic::dx_resource_load_typedbuffer:
+    expandTypedBufferLoadIntrinsic(Orig);
+    Orig->eraseFromParent();
+    return true;
+  case Intrinsic::dx_resource_store_typedbuffer:
+    expandTypedBufferStoreIntrinsic(Orig);
+    Orig->eraseFromParent();
+    return true;
   case Intrinsic::usub_sat:
     Result = expandUsubSat(Orig);
     break;
diff --git a/llvm/test/CodeGen/DirectX/BufferLoad.ll b/llvm/test/CodeGen/DirectX/BufferLoad.ll
index 6d5146a9026ce..96bfbb8db95ce 100644
--- a/llvm/test/CodeGen/DirectX/BufferLoad.ll
+++ b/llvm/test/CodeGen/DirectX/BufferLoad.ll
@@ -197,4 +197,36 @@ define void @loadv4i16() {
   ret void
 }
 
+define void @loadf64() {
+  ; show dxil op lower can handle typedbuffer load where target is double but load type is <2 x i32>
+  ; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false) #0
+  %buffer = call target("dx.TypedBuffer", double, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+          i32 0, i32 1, i32 1, i32 0, i1 false)
+
+  ; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]], %dx.types.ResourceProperties { i32 4106, i32 266 }) #0
+  %load = call { <2 x i32>, i1 } @llvm.dx.resource.load.typedbuffer(
+      target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0)
+
+; CHECK: call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[BA]], i32 0, i32 undef) #1
+  %val = extractvalue { <2 x i32>, i1 } %load, 0
+  ret void
+}
+
+define void @loadv2f64() {
+  ; show dxil op lower can handle typedbuffer load where target is double2 but load type is <4 x i32>
+  ; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false) #0
+  %buffer = call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+          i32 0, i32 1, i32 1, i32 0, i1 false)
+
+  ; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]], %dx.types.ResourceProperties { i32 4106, i32 522 }) #0
+  %load = call { <4 x i32>, i1 } @llvm.dx.resource.load.typedbuffer(
+      target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0)
+
+  ; CHECK: call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[BA]], i32 0, i32 undef) #1
+  %val = extractvalue { <4 x i32>, i1 } %load, 0
+  ret void
+}
+
 ; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(read) {{.*}}}
diff --git a/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll b/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
new file mode 100644
index 0000000000000..53ed74b9868e4
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
@@ -0,0 +1,58 @@
+; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+define void @loadf64() {
+  ; check the handle from binding is unchanged
+  ; CHECK: [[B:%.*]] = call target("dx.TypedBuffer", double, 1, 0, 0)
+  ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+  ; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false)
+  %buffer = call target("dx.TypedBuffer", double, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+          i32 0, i32 1, i32 1, i32 0, i1 false)
+
+  ; check we load an <2 x i32> instead of a double
+  ; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 }
+  ; CHECK-SAME: @llvm.dx.resource.load.typedbuffer.v2i32.tdx.TypedBuffer_f64_1_0_0t(
+  ; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0)	
+  %load0 = call {double, i1} @llvm.dx.resource.load.typedbuffer(
+      target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0)
+
+  ; check we extract the two i32 and construct a double
+  ; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0
+  ; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i64 0
+  ; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i64 1
+  ; CHECK: call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]])
+  %data0 = extractvalue {double, i1} %load0, 0
+  ret void
+}
+
+define void @loadv2f64() {
+  ; check the handle from binding is unchanged
+  ; CHECK: [[B:%.*]] = call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+  ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+  ; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false)
+  %buffer = call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+          i32 0, i32 1, i32 1, i32 0, i1 false)
+
+  ; check we load an <4 x i32> instead of a double2
+  ; CHECK: [[L0:%.*]] = call { <4 x i32>, i1 }
+  ; CHECK-SAME: @llvm.dx.resource.load.typedbuffer.v4i32.tdx.TypedBuffer_v2f64_1_0_0t(
+  ; CHECK-SAME: target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[B]], i32 0)
+  %load0 = call { <2 x double>, i1 } @llvm.dx.resource.load.typedbuffer(
+      target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0)
+
+  ; check we extract the 4 i32 and construct a <2 x double>
+  ; CHECK: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0
+  ; CHECK: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i64 0
+  ; CHECK: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i64 1
+  ; CHECK: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i64 2
+  ; CHECK: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i64 3
+  ; CHECK: [[Dbl1:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo1]], i32 [[Hi1]])
+  ; CHECK: [[Vec:%.*]] = insertelement <2 x double> poison, double [[Dbl1]], i64 0
+  ; CHECK: [[Dbl2:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo2]], i32 [[Hi2]])
+  ; CHECK: insertelement <2 x double> [[Vec]], double [[Dbl2]], i64 1
+  %data0 = extractvalue { <2 x double>, i1 } %load0, 0
+  ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/BufferStore.ll b/llvm/test/CodeGen/DirectX/BufferStore.ll
index 363a3c723bfd5..e21047c9296d1 100644
--- a/llvm/test/CodeGen/DirectX/BufferStore.ll
+++ b/llvm/test/CodeGen/DirectX/BufferStore.ll
@@ -161,3 +161,44 @@ define void @store_scalarized_floats(float %data0, float %data1, float %data2, f
 
   ret void
 }
+
+define void @storef64(<2 x i32> %0) {
+  ; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217,
+  ; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]]
+  
+  %buffer = tail call target("dx.TypedBuffer", double, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+          i32 0, i32 0, i32 1, i32 0, i1 false)
+
+  ; The temporary casts should all have been cleaned up
+  ; CHECK-NOT: %dx.resource.casthandle
+
+  ; CHECK: [[D0:%.*]] = extractelement <2 x i32> %0, i32 0
+  ; CHECK: [[D1:%.*]] = extractelement <2 x i32> %0, i32 1
+  ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[BA]], i32 0, i32 undef, i32 %2, i32 %3, i32 %2, i32 %2, i8 15)
+  call void @llvm.dx.resource.store.typedbuffer(
+      target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0, <2 x i32> %0)
+  ret void
+}
+
+define void @storev2f64(<4 x i32> %0) {
+  ; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217,
+  ; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]]
+  
+  %buffer = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+          i32 0, i32 0, i32 1, i32 0, i1 false)
+
+  ; The temporary casts should all have been cleaned up
+  ; CHECK-NOT: %dx.resource.casthandle
+
+  ; CHECK: [[D0:%.*]] = extractelement <4 x i32> %0, i32 0
+  ; CHECK: [[D1:%.*]] = extractelement <4 x i32> %0, i32 1
+  ; CHECK: [[D2:%.*]] = extractelement <4 x i32> %0, i32 2
+  ; CHECK: [[D3:%.*]] = extractelement <4 x i32> %0, i32 3
+  ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[BA]], i32 0, i32 undef, i32 [[D0]], i32 [[D1]], i32 [[D2]], i32 [[D3]], i8 15)
+  call void @llvm.dx.resource.store.typedbuffer(
+      target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0,
+      <4 x i32> %0)
+  ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll b/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll
new file mode 100644
index 0000000000000..bb4dbb5efb593
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll
@@ -0,0 +1,47 @@
+; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+define void @storef64(double %0) {
+  ; CHECK: [[B:%.*]] = tail call target("dx.TypedBuffer", double, 1, 0, 0)
+  ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+  ; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false)
+  %buffer = tail call target("dx.TypedBuffer", double, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+          i32 0, i32 0, i32 1, i32 0, i1 false)
+
+  ; check we split the double and store the lo and hi bits
+  ; CHECK: [[SD:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double %0)
+  ; CHECK: [[Lo:%.*]] = extractvalue { i32, i32 } [[SD]], 0
+  ; CHECK: [[Hi:%.*]] = extractvalue { i32, i32 } [[SD]], 1
+  ; CHECK: [[Vec1:%.*]] = insertelement <2 x i32> poison, i32 [[Lo]], i64 0
+  ; CHECK: [[Vec2:%.*]] = insertelement <2 x i32> [[Vec1]], i32 [[Hi]], i64 1
+  ; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_f64_1_0_0t.v2i32(
+  ; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0, <2 x i32> [[Vec2]])
+  call void @llvm.dx.resource.store.typedbuffer(
+      target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0,
+      double %0)
+  ret void
+}
+
+
+define void @storev2f64(<2 x double> %0) {
+  ; CHECK: [[B:%.*]] = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+  ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+  ; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false)
+  %buffer = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+          i32 0, i32 0, i32 1, i32 0, i1 false)
+
+  ; CHECK: [[SD:%.*]] = call { <2 x i32>, <2 x i32> }
+  ; CHECK-SAME: @llvm.dx.splitdouble.v2i32(<2 x double> %0)
+  ; CHECK: [[Lo:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 0
+  ; CHECK: [[Hi:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 1
+  ; CHECK: [[Vec:%.*]] = shufflevector <2 x i32> [[Lo]], <2 x i32> [[Hi]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  ; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2f64_1_0_0t.v4i32(
+  ; CHECK-SAME: target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[B]], i32 0, <4 x i32> [[Vec]])
+  call void @llvm.dx.resource.store.typedbuffer(
+      target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0,
+      <2 x double> %0)
+  ret void
+}

>From 39d443dc82a10199cfc23d38de5dc1b9f4e44e1b Mon Sep 17 00:00:00 2001
From: Sarah Spall <sarahspall at microsoft.com>
Date: Thu, 15 May 2025 08:51:42 -0700
Subject: [PATCH 2/3] respond to pr feedback

---
 .../Target/DirectX/DXILIntrinsicExpansion.cpp | 69 ++++++++++---------
 llvm/test/CodeGen/DirectX/BufferLoadDouble.ll | 53 +++++++++++++-
 2 files changed, 89 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index bfa41b36166aa..d796efeb004bb 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -541,20 +541,27 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) {
   return Builder.CreateFMul(X, PiOver180);
 }
 
-static void expandTypedBufferLoadIntrinsic(CallInst *Orig) {
+static Value *expandTypedBufferLoadIntrinsic(CallInst *Orig) {
   IRBuilder<> Builder(Orig);
 
-  unsigned ExtractNum =
-      Orig->getType()->getStructElementType(0)->isVectorTy() ? 4 : 2;
+  Type *BufferTy = Orig->getType()->getStructElementType(0);
+
+  unsigned ExtractNum = 2;
+  if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
+    assert(VT->getNumElements() == 2 &&
+           "TypedBufferLoad double vector has wrong size");
+    ExtractNum = 4;
+  }
+
   Type *Ty = VectorType::get(Builder.getInt32Ty(), ExtractNum, false);
 
   Type *LoadType = StructType::get(Ty, Builder.getInt1Ty());
-  auto *X =
+  CallInst *Load =
       Builder.CreateIntrinsic(LoadType, Intrinsic::dx_resource_load_typedbuffer,
                               {Orig->getOperand(0), Orig->getOperand(1)});
 
-  // create new extract value
-  Value *Extract = Builder.CreateExtractValue(X, {0});
+  // extract the buffer load's result
+  Value *Extract = Builder.CreateExtractValue(Load, {0});
 
   SmallVector<Value *> ExtractElements;
   for (unsigned I = 0; I < ExtractNum; ++I)
@@ -562,8 +569,7 @@ static void expandTypedBufferLoadIntrinsic(CallInst *Orig) {
         Builder.CreateExtractElement(Extract, (uint64_t)I));
 
   // combine into double(s)
-  Value *Result =
-      PoisonValue::get(VectorType::get(Builder.getDoubleTy(), 2, false));
+  Value *Result = PoisonValue::get(BufferTy);
   for (unsigned I = 0; I < ExtractNum; I += 2) {
     Value *Dbl =
         Builder.CreateIntrinsic(Builder.getDoubleTy(), Intrinsic::dx_asdouble,
@@ -574,26 +580,29 @@ static void expandTypedBufferLoadIntrinsic(CallInst *Orig) {
       Result = Dbl;
   }
 
-  assert(Orig->hasOneUser() && "TypedBufferLoad is expected to have one user");
-  auto *U = Orig->user_back();
-  auto *OldExtract = dyn_cast<ExtractValueInst>(U);
-  if (!OldExtract)
-    llvm_unreachable("TypedBufferLoad's only users should be ExtractValueInst");
-  OldExtract->replaceAllUsesWith(Result);
-  OldExtract->eraseFromParent();
+  Value *CheckBit = Builder.CreateExtractValue(Load, {1});
+
+  Value *Struct = PoisonValue::get(Orig->getType());
+  Struct = Builder.CreateInsertValue(Struct, Result, {0});
+  Struct = Builder.CreateInsertValue(Struct, CheckBit, {1});
+  return Struct;
 }
 
-void expandTypedBufferStoreIntrinsic(CallInst *Orig) {
+static Value *expandTypedBufferStoreIntrinsic(CallInst *Orig) {
   IRBuilder<> Builder(Orig);
 
-  unsigned ExtractNum =
-      Orig->getFunctionType()->getParamType(2)->isVectorTy() ? 4 : 2;
+  Type *BufferTy = Orig->getFunctionType()->getParamType(2);
+
+  unsigned ExtractNum = 2;
+  if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
+    assert(VT->getNumElements() == 2 &&
+           "TypedBufferStore double vector has wrong size");
+    ExtractNum = 4;
+  }
+
   Type *SplitElementTy = Builder.getInt32Ty();
-  SmallVector<int> Mask = {0, 1};
-  if (ExtractNum == 4) {
+  if (ExtractNum == 4)
     SplitElementTy = VectorType::get(SplitElementTy, 2, false);
-    Mask = {0, 2, 1, 3};
-  }
 
   // split our double(s)
   auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);
@@ -610,9 +619,9 @@ void expandTypedBufferStoreIntrinsic(CallInst *Orig) {
   } else
     Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});
 
-  Builder.CreateIntrinsic(Builder.getVoidTy(),
-                          Intrinsic::dx_resource_store_typedbuffer,
-                          {Orig->getOperand(0), Orig->getOperand(1), Val});
+  return Builder.CreateIntrinsic(
+      Builder.getVoidTy(), Intrinsic::dx_resource_store_typedbuffer,
+      {Orig->getOperand(0), Orig->getOperand(1), Val});
 }
 
 static Intrinsic::ID getMaxForClamp(Intrinsic::ID ClampIntrinsic) {
@@ -744,13 +753,11 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
     Result = expandRadiansIntrinsic(Orig);
     break;
   case Intrinsic::dx_resource_load_typedbuffer:
-    expandTypedBufferLoadIntrinsic(Orig);
-    Orig->eraseFromParent();
-    return true;
+    Result = expandTypedBufferLoadIntrinsic(Orig);
+    break;
   case Intrinsic::dx_resource_store_typedbuffer:
-    expandTypedBufferStoreIntrinsic(Orig);
-    Orig->eraseFromParent();
-    return true;
+    Result = expandTypedBufferStoreIntrinsic(Orig);
+    break;
   case Intrinsic::usub_sat:
     Result = expandUsubSat(Orig);
     break;
diff --git a/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll b/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
index 53ed74b9868e4..5e9f3979e9ef3 100644
--- a/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
+++ b/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
@@ -11,7 +11,10 @@ define void @loadf64() {
       @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
           i32 0, i32 1, i32 1, i32 0, i1 false)
 
+  ret void
+
   ; check we load an <2 x i32> instead of a double
+  ; CHECK-NOT: call {double, i1} @llvm.dx.resource.load.typedbuffer
   ; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 }
   ; CHECK-SAME: @llvm.dx.resource.load.typedbuffer.v2i32.tdx.TypedBuffer_f64_1_0_0t(
   ; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0)	
@@ -22,7 +25,12 @@ define void @loadf64() {
   ; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0
   ; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i64 0
   ; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i64 1
-  ; CHECK: call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]])
+  ; CHECK: [[DBL:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]])
+  ; construct a new {double, i1}
+  ; CHECK: [[CB:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 1
+  ; CHECK: [[S1:%.*]] = insertvalue { double, i1 } poison, double [[DBL]], 0
+  ; CHECK: [[S2:%.*]] = insertvalue { double, i1 } [[S1]], i1 [[CB]], 1
+  ; CHECK: extractvalue { double, i1 } [[S2]], 0
   %data0 = extractvalue {double, i1} %load0, 0
   ret void
 }
@@ -52,7 +60,48 @@ define void @loadv2f64() {
   ; CHECK: [[Dbl1:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo1]], i32 [[Hi1]])
   ; CHECK: [[Vec:%.*]] = insertelement <2 x double> poison, double [[Dbl1]], i64 0
   ; CHECK: [[Dbl2:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo2]], i32 [[Hi2]])
-  ; CHECK: insertelement <2 x double> [[Vec]], double [[Dbl2]], i64 1
+  ; CHECK: [[Vec2:%.*]] = insertelement <2 x double> [[Vec]], double [[Dbl2]], i64 1
+  ; construct a new {<2 x double>, i1}
+  ; CHECK: [[CB:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 1
+  ; CHECK: [[S1:%.*]] = insertvalue { <2 x double>, i1 } poison, <2 x double> [[Vec2]], 0
+  ; CHECK: [[S2:%.*]] = insertvalue { <2 x double>, i1 } [[S1]], i1 [[CB]], 1
+  ; CHECK: extractvalue { <2 x double>, i1 } [[S2]], 0
   %data0 = extractvalue { <2 x double>, i1 } %load0, 0
   ret void
 }
+
+; show we properly handle extracting the check bit
+define void @loadf64WithCheckBit() {
+  ; check the handle from binding is unchanged
+  ; CHECK: [[B:%.*]] = call target("dx.TypedBuffer", double, 1, 0, 0)
+  ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+  ; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false)
+  %buffer = call target("dx.TypedBuffer", double, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+          i32 0, i32 1, i32 1, i32 0, i1 false)
+
+  ret void
+
+  ; check we load an <2 x i32> instead of a double
+  ; CHECK-NOT: call {double, i1} @llvm.dx.resource.load.typedbuffer
+  ; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 }
+  ; CHECK-SAME: @llvm.dx.resource.load.typedbuffer.v2i32.tdx.TypedBuffer_f64_1_0_0t(
+  ; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0)	
+  %load0 = call {double, i1} @llvm.dx.resource.load.typedbuffer(
+      target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0)
+
+  ; check we extract the two i32 and construct a double
+  ; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0
+  ; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i64 0
+  ; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i64 1
+  ; CHECK: [[DBL:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]])
+  ; construct a new {double, i1}
+  ; CHECK: [[CB:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 1
+  ; CHECK: [[S1:%.*]] = insertvalue { double, i1 } poison, double [[DBL]], 0
+  ; CHECK: [[S2:%.*]] = insertvalue { double, i1 } [[S1]], i1 [[CB]], 1
+  ; CHECK: extractvalue { double, i1 } [[S2]], 0
+  %data0 = extractvalue {double, i1} %load0, 0
+  ; CHECK: extractvalue { double, i1 } [[S2]], 1
+  %cb = extractvalue {double, i1} %load0, 1
+  ret void
+}
\ No newline at end of file

>From 730bc337d89f40cf194c80acdd8589a5f5607505 Mon Sep 17 00:00:00 2001
From: Sarah Spall <sarahspall at microsoft.com>
Date: Thu, 15 May 2025 11:36:00 -0700
Subject: [PATCH 3/3] update comments

---
 llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index d796efeb004bb..cbec0b31bca3c 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -70,14 +70,14 @@ static bool isIntrinsicExpansion(Function &F) {
   case Intrinsic::vector_reduce_add:
   case Intrinsic::vector_reduce_fadd:
     return true;
-  case Intrinsic::dx_resource_load_typedbuffer: // want to transform double and
-                                                // double2
+  case Intrinsic::dx_resource_load_typedbuffer:
+    // We need to handle doubles and vector of doubles.
     return F.getReturnType()
         ->getStructElementType(0)
         ->getScalarType()
         ->isDoubleTy();
-  case Intrinsic::dx_resource_store_typedbuffer: // want to transform double and
-                                                 // double2
+  case Intrinsic::dx_resource_store_typedbuffer:
+    // We need to handle doubles and vector of doubles.
     return F.getFunctionType()->getParamType(2)->getScalarType()->isDoubleTy();
   }
   return false;



More information about the llvm-commits mailing list