[llvm] 7f34d3a - [DirectX] Add support for typedBufferLoad and Store for RWBuffer<double2> and RWBuffer<double> (#139996)

Fri May 30 08:16:22 PDT 2025

Author: Sarah Spall
Date: 2025-05-30T08:16:19-07:00
New Revision: 7f34d3acbac3df2bceb5f6450413706bde38908c

URL: https://github.com/llvm/llvm-project/commit/7f34d3acbac3df2bceb5f6450413706bde38908c
DIFF: https://github.com/llvm/llvm-project/commit/7f34d3acbac3df2bceb5f6450413706bde38908c.diff

LOG: [DirectX] Add support for typedBufferLoad and Store for RWBuffer<double2> and RWBuffer<double> (#139996)

typedBufferLoad of double/double2 is expanded to a typedBufferLoad of a
<2 x i32>/<4 x i32> and asdouble
typedBufferStore of a double/double2 is expanded to a splitdouble and a
typedBufferStore of a <2 x i32>/<4 x i32>
Add tests showing result of intrinsic expansion for typedBufferLoad and
typedBufferStore
Add tests showing dxil op lowering can handle typedBufferLoad and
typedBufferStore where the target type doesn't match the typedBufferLoad
and typedBufferStore type
Closes #104423

Added: 
    llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
    llvm/test/CodeGen/DirectX/BufferStoreDouble.ll

Modified: 
    llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
    llvm/test/CodeGen/DirectX/BufferLoad.ll
    llvm/test/CodeGen/DirectX/BufferStore.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index cff8d637dcb87..f99e8e7ccdc5d 100644

--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -70,6 +70,15 @@ static bool isIntrinsicExpansion(Function &F) {
   case Intrinsic::vector_reduce_add:
   case Intrinsic::vector_reduce_fadd:
     return true;
+  case Intrinsic::dx_resource_load_typedbuffer:
+    // We need to handle doubles and vector of doubles.
+    return F.getReturnType()
+        ->getStructElementType(0)
+        ->getScalarType()
+        ->isDoubleTy();
+  case Intrinsic::dx_resource_store_typedbuffer:
+    // We need to handle doubles and vector of doubles.
+    return F.getFunctionType()->getParamType(2)->getScalarType()->isDoubleTy();
   }
   return false;
 }
@@ -532,6 +541,110 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) {
   return Builder.CreateFMul(X, PiOver180);
 }
 
+static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
+  IRBuilder<> Builder(Orig);
+
+  Type *BufferTy = Orig->getType()->getStructElementType(0);
+  assert(BufferTy->getScalarType()->isDoubleTy() &&
+         "Only expand double or double2");
+
+  unsigned ExtractNum = 2;
+  if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
+    assert(VT->getNumElements() == 2 &&
+           "TypedBufferLoad double vector has wrong size");
+    ExtractNum = 4;
+  }
+
+  Type *Ty = VectorType::get(Builder.getInt32Ty(), ExtractNum, false);
+
+  Type *LoadType = StructType::get(Ty, Builder.getInt1Ty());
+  CallInst *Load =
+      Builder.CreateIntrinsic(LoadType, Intrinsic::dx_resource_load_typedbuffer,
+                              {Orig->getOperand(0), Orig->getOperand(1)});
+
+  // extract the buffer load's result
+  Value *Extract = Builder.CreateExtractValue(Load, {0});
+
+  SmallVector<Value *> ExtractElements;
+  for (unsigned I = 0; I < ExtractNum; ++I)
+    ExtractElements.push_back(
+        Builder.CreateExtractElement(Extract, Builder.getInt32(I)));
+
+  // combine into double(s)
+  Value *Result = PoisonValue::get(BufferTy);
+  for (unsigned I = 0; I < ExtractNum; I += 2) {
+    Value *Dbl =
+        Builder.CreateIntrinsic(Builder.getDoubleTy(), Intrinsic::dx_asdouble,
+                                {ExtractElements[I], ExtractElements[I + 1]});
+    if (ExtractNum == 4)
+      Result =
+          Builder.CreateInsertElement(Result, Dbl, Builder.getInt32(I / 2));
+    else
+      Result = Dbl;
+  }
+
+  Value *CheckBit = nullptr;
+  for (User *U : make_early_inc_range(Orig->users())) {
+    auto *EVI = cast<ExtractValueInst>(U);
+    ArrayRef<unsigned> Indices = EVI->getIndices();
+    assert(Indices.size() == 1);
+
+    if (Indices[0] == 0) {
+      // Use of the value(s)
+      EVI->replaceAllUsesWith(Result);
+    } else {
+      // Use of the check bit
+      assert(Indices[0] == 1 && "Unexpected type for typedbufferload");
+      if (!CheckBit)
+        CheckBit = Builder.CreateExtractValue(Load, {1});
+      EVI->replaceAllUsesWith(CheckBit);
+    }
+    EVI->eraseFromParent();
+  }
+  Orig->eraseFromParent();
+  return true;
+}
+
+static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) {
+  IRBuilder<> Builder(Orig);
+
+  Type *BufferTy = Orig->getFunctionType()->getParamType(2);
+  assert(BufferTy->getScalarType()->isDoubleTy() &&
+         "Only expand double or double2");
+
+  unsigned ExtractNum = 2;
+  if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
+    assert(VT->getNumElements() == 2 &&
+           "TypedBufferStore double vector has wrong size");
+    ExtractNum = 4;
+  }
+
+  Type *SplitElementTy = Builder.getInt32Ty();
+  if (ExtractNum == 4)
+    SplitElementTy = VectorType::get(SplitElementTy, 2, false);
+
+  // split our double(s)
+  auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);
+  Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
+                                         Orig->getOperand(2));
+  // create our vector
+  Value *LowBits = Builder.CreateExtractValue(Split, 0);
+  Value *HighBits = Builder.CreateExtractValue(Split, 1);
+  Value *Val;
+  if (ExtractNum == 2) {
+    Val = PoisonValue::get(VectorType::get(SplitElementTy, 2, false));
+    Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
+    Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
+  } else
+    Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});
+
+  Builder.CreateIntrinsic(Builder.getVoidTy(),
+                          Intrinsic::dx_resource_store_typedbuffer,
+                          {Orig->getOperand(0), Orig->getOperand(1), Val});
+  Orig->eraseFromParent();
+  return true;
+}
+
 static Intrinsic::ID getMaxForClamp(Intrinsic::ID ClampIntrinsic) {
   if (ClampIntrinsic == Intrinsic::dx_uclamp)
     return Intrinsic::umax;
@@ -660,6 +773,14 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
   case Intrinsic::dx_radians:
     Result = expandRadiansIntrinsic(Orig);
     break;
+  case Intrinsic::dx_resource_load_typedbuffer:
+    if (expandTypedBufferLoadIntrinsic(Orig))
+      return true;
+    break;
+  case Intrinsic::dx_resource_store_typedbuffer:
+    if (expandTypedBufferStoreIntrinsic(Orig))
+      return true;
+    break;
   case Intrinsic::usub_sat:
     Result = expandUsubSat(Orig);
     break;

diff  --git a/llvm/test/CodeGen/DirectX/BufferLoad.ll b/llvm/test/CodeGen/DirectX/BufferLoad.ll
index 5678c3941a966..589d551d5ae9e 100644
--- a/llvm/test/CodeGen/DirectX/BufferLoad.ll
+++ b/llvm/test/CodeGen/DirectX/BufferLoad.ll
@@ -197,4 +197,36 @@ define void @loadv4i16() {
   ret void
 }
 
+define void @loadf64() {
+  ; show dxil op lower can handle typedbuffer load where target is double but load type is <2 x i32>
+  ; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false) #0
+  %buffer = call target("dx.TypedBuffer", double, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+          i32 0, i32 1, i32 1, i32 0, i1 false, ptr null)
+
+  ; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]], %dx.types.ResourceProperties { i32 4106, i32 266 }) #0
+  %load = call { <2 x i32>, i1 } @llvm.dx.resource.load.typedbuffer(
+      target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0)
+
+; CHECK: call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[BA]], i32 0, i32 undef) #1
+  %val = extractvalue { <2 x i32>, i1 } %load, 0
+  ret void
+}
+
+define void @loadv2f64() {
+  ; show dxil op lower can handle typedbuffer load where target is double2 but load type is <4 x i32>
+  ; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false) #0
+  %buffer = call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+          i32 0, i32 1, i32 1, i32 0, i1 false, ptr null)
+
+  ; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]], %dx.types.ResourceProperties { i32 4106, i32 522 }) #0
+  %load = call { <4 x i32>, i1 } @llvm.dx.resource.load.typedbuffer(
+      target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0)
+
+  ; CHECK: call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[BA]], i32 0, i32 undef) #1
+  %val = extractvalue { <4 x i32>, i1 } %load, 0
+  ret void
+}
+
 ; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(read) {{.*}}}

diff  --git a/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll b/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
new file mode 100644
index 0000000000000..80a071a66364b
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
@@ -0,0 +1,91 @@
+; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+define void @loadf64() {
+  ; check the handle from binding is unchanged
+  ; CHECK: [[B:%.*]] = call target("dx.TypedBuffer", double, 1, 0, 0)
+  ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+  ; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false, ptr null)
+  %buffer = call target("dx.TypedBuffer", double, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+          i32 0, i32 1, i32 1, i32 0, i1 false, ptr null)
+
+  ; check we load an <2 x i32> instead of a double
+  ; CHECK-NOT: call {double, i1} @llvm.dx.resource.load.typedbuffer
+  ; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 }
+  ; CHECK-SAME: @llvm.dx.resource.load.typedbuffer.v2i32.tdx.TypedBuffer_f64_1_0_0t(
+  ; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0)	
+  %load0 = call {double, i1} @llvm.dx.resource.load.typedbuffer(
+      target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0)
+
+  ; check we extract the two i32 and construct a double
+  ; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0
+  ; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i32 0
+  ; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i32 1
+  ; CHECK: [[DBL:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]])
+  ; CHECK-NOT: extractvalue { double, i1 }
+  %data0 = extractvalue {double, i1} %load0, 0
+  ret void
+}
+
+define void @loadv2f64() {
+  ; check the handle from binding is unchanged
+  ; CHECK: [[B:%.*]] = call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+  ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+  ; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false, ptr null)
+  %buffer = call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+          i32 0, i32 1, i32 1, i32 0, i1 false, ptr null)
+
+  ; check we load an <4 x i32> instead of a double2
+  ; CHECK: [[L0:%.*]] = call { <4 x i32>, i1 }
+  ; CHECK-SAME: @llvm.dx.resource.load.typedbuffer.v4i32.tdx.TypedBuffer_v2f64_1_0_0t(
+  ; CHECK-SAME: target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[B]], i32 0)
+  %load0 = call { <2 x double>, i1 } @llvm.dx.resource.load.typedbuffer(
+      target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0)
+
+  ; check we extract the 4 i32 and construct a <2 x double>
+  ; CHECK: [[D0:%.*]] = extractvalue { <4 x i32>, i1 } [[L0]], 0
+  ; CHECK: [[Lo1:%.*]] = extractelement <4 x i32> [[D0]], i32 0
+  ; CHECK: [[Hi1:%.*]] = extractelement <4 x i32> [[D0]], i32 1
+  ; CHECK: [[Lo2:%.*]] = extractelement <4 x i32> [[D0]], i32 2
+  ; CHECK: [[Hi2:%.*]] = extractelement <4 x i32> [[D0]], i32 3
+  ; CHECK: [[Dbl1:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo1]], i32 [[Hi1]])
+  ; CHECK: [[Vec:%.*]] = insertelement <2 x double> poison, double [[Dbl1]], i32 0
+  ; CHECK: [[Dbl2:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo2]], i32 [[Hi2]])
+  ; CHECK: [[Vec2:%.*]] = insertelement <2 x double> [[Vec]], double [[Dbl2]], i32 1
+  ; CHECK-NOT: extractvalue { <2 x double>, i1 }
+  %data0 = extractvalue { <2 x double>, i1 } %load0, 0
+  ret void
+}
+
+; show we properly handle extracting the check bit
+define void @loadf64WithCheckBit() {
+  ; check the handle from binding is unchanged
+  ; CHECK: [[B:%.*]] = call target("dx.TypedBuffer", double, 1, 0, 0)
+  ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+  ; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false, ptr null)
+  %buffer = call target("dx.TypedBuffer", double, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+          i32 0, i32 1, i32 1, i32 0, i1 false, ptr null)
+
+  ; check we load an <2 x i32> instead of a double
+  ; CHECK-NOT: call {double, i1} @llvm.dx.resource.load.typedbuffer
+  ; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 }
+  ; CHECK-SAME: @llvm.dx.resource.load.typedbuffer.v2i32.tdx.TypedBuffer_f64_1_0_0t(
+  ; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0)	
+  %load0 = call {double, i1} @llvm.dx.resource.load.typedbuffer(
+      target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0)
+
+  ; check we extract the two i32 and construct a double
+  ; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0
+  ; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i32 0
+  ; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i32 1
+  ; CHECK: [[DBL:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]])
+  %data0 = extractvalue {double, i1} %load0, 0
+  ; CHECK: extractvalue { <2 x i32>, i1 } [[L0]], 1
+  ; CHECK-NOT: extractvalue { double, i1 }
+  %cb = extractvalue {double, i1} %load0, 1
+  ret void
+}
\ No newline at end of file

diff  --git a/llvm/test/CodeGen/DirectX/BufferStore.ll b/llvm/test/CodeGen/DirectX/BufferStore.ll
index 6c5bf0a6baa08..39d578edb42e3 100644
--- a/llvm/test/CodeGen/DirectX/BufferStore.ll
+++ b/llvm/test/CodeGen/DirectX/BufferStore.ll
@@ -161,3 +161,44 @@ define void @store_scalarized_floats(float %data0, float %data1, float %data2, f
 
   ret void
 }
+
+define void @storef64(<2 x i32> %0) {
+  ; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217,
+  ; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]]
+  
+  %buffer = tail call target("dx.TypedBuffer", double, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+          i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+
+  ; The temporary casts should all have been cleaned up
+  ; CHECK-NOT: %dx.resource.casthandle
+
+  ; CHECK: [[D0:%.*]] = extractelement <2 x i32> %0, i32 0
+  ; CHECK: [[D1:%.*]] = extractelement <2 x i32> %0, i32 1
+  ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[BA]], i32 0, i32 undef, i32 %2, i32 %3, i32 %2, i32 %2, i8 15)
+  call void @llvm.dx.resource.store.typedbuffer(
+      target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0, <2 x i32> %0)
+  ret void
+}
+
+define void @storev2f64(<4 x i32> %0) {
+  ; CHECK: [[B1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217,
+  ; CHECK: [[BA:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[B1]]
+  
+  %buffer = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+          i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+
+  ; The temporary casts should all have been cleaned up
+  ; CHECK-NOT: %dx.resource.casthandle
+
+  ; CHECK: [[D0:%.*]] = extractelement <4 x i32> %0, i32 0
+  ; CHECK: [[D1:%.*]] = extractelement <4 x i32> %0, i32 1
+  ; CHECK: [[D2:%.*]] = extractelement <4 x i32> %0, i32 2
+  ; CHECK: [[D3:%.*]] = extractelement <4 x i32> %0, i32 3
+  ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[BA]], i32 0, i32 undef, i32 [[D0]], i32 [[D1]], i32 [[D2]], i32 [[D3]], i8 15)
+  call void @llvm.dx.resource.store.typedbuffer(
+      target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0,
+      <4 x i32> %0)
+  ret void
+}

diff  --git a/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll b/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll
new file mode 100644
index 0000000000000..9c3dab0cc1e46
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll
@@ -0,0 +1,47 @@
+; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+define void @storef64(double %0) {
+  ; CHECK: [[B:%.*]] = tail call target("dx.TypedBuffer", double, 1, 0, 0)
+  ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+  ; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+  %buffer = tail call target("dx.TypedBuffer", double, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(
+          i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+
+  ; check we split the double and store the lo and hi bits
+  ; CHECK: [[SD:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double %0)
+  ; CHECK: [[Lo:%.*]] = extractvalue { i32, i32 } [[SD]], 0
+  ; CHECK: [[Hi:%.*]] = extractvalue { i32, i32 } [[SD]], 1
+  ; CHECK: [[Vec1:%.*]] = insertelement <2 x i32> poison, i32 [[Lo]], i32 0
+  ; CHECK: [[Vec2:%.*]] = insertelement <2 x i32> [[Vec1]], i32 [[Hi]], i32 1
+  ; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_f64_1_0_0t.v2i32(
+  ; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0, <2 x i32> [[Vec2]])
+  call void @llvm.dx.resource.store.typedbuffer(
+      target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0,
+      double %0)
+  ret void
+}
+
+
+define void @storev2f64(<2 x double> %0) {
+  ; CHECK: [[B:%.*]] = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+  ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+  ; CHECK-SAME: i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+  %buffer = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0)
+      @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(
+          i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+
+  ; CHECK: [[SD:%.*]] = call { <2 x i32>, <2 x i32> }
+  ; CHECK-SAME: @llvm.dx.splitdouble.v2i32(<2 x double> %0)
+  ; CHECK: [[Lo:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 0
+  ; CHECK: [[Hi:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 1
+  ; CHECK: [[Vec:%.*]] = shufflevector <2 x i32> [[Lo]], <2 x i32> [[Hi]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  ; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2f64_1_0_0t.v4i32(
+  ; CHECK-SAME: target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[B]], i32 0, <4 x i32> [[Vec]])
+  call void @llvm.dx.resource.store.typedbuffer(
+      target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0,
+      <2 x double> %0)
+  ret void
+}