[llvm] [DirectX] add support for i64 buffer load/stores (PR #145047)
Farzon Lotfi via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 20 11:18:17 PDT 2025
https://github.com/farzonl updated https://github.com/llvm/llvm-project/pull/145047
>From c7810528ae60427ec487db4ffb68a288920f5efb Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Wed, 18 Jun 2025 11:53:52 -0400
Subject: [PATCH 1/3] [DirectX] add support for i64 buffer load/stores
---
.../Target/DirectX/DXILIntrinsicExpansion.cpp | 179 +++++++++++++-----
llvm/test/CodeGen/DirectX/BufferLoadDouble.ll | 4 +-
llvm/test/CodeGen/DirectX/BufferLoadInt64.ll | 56 ++++++
.../test/CodeGen/DirectX/BufferStoreDouble.ll | 43 +++++
llvm/test/CodeGen/DirectX/BufferStoreInt64.ll | 46 +++++
5 files changed, 281 insertions(+), 47 deletions(-)
create mode 100644 llvm/test/CodeGen/DirectX/BufferLoadInt64.ll
create mode 100644 llvm/test/CodeGen/DirectX/BufferStoreInt64.ll
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index f99e8e7ccdc5d..eb9268e78a9ad 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -25,6 +25,7 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -70,15 +71,17 @@ static bool isIntrinsicExpansion(Function &F) {
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_fadd:
return true;
- case Intrinsic::dx_resource_load_typedbuffer:
- // We need to handle doubles and vector of doubles.
- return F.getReturnType()
- ->getStructElementType(0)
- ->getScalarType()
- ->isDoubleTy();
- case Intrinsic::dx_resource_store_typedbuffer:
- // We need to handle doubles and vector of doubles.
- return F.getFunctionType()->getParamType(2)->getScalarType()->isDoubleTy();
+ case Intrinsic::dx_resource_load_typedbuffer: {
+ // We need to handle i64, doubles, and vectors of them.
+ Type *ScalarTy =
+ F.getReturnType()->getStructElementType(0)->getScalarType();
+ return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64);
+ }
+ case Intrinsic::dx_resource_store_typedbuffer: {
+ // We need to handle i64 and doubles and vectors of i64 and doubles.
+ Type *ScalarTy = F.getFunctionType()->getParamType(2)->getScalarType();
+ return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64);
+ }
}
return false;
}
@@ -545,13 +548,15 @@ static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
IRBuilder<> Builder(Orig);
Type *BufferTy = Orig->getType()->getStructElementType(0);
- assert(BufferTy->getScalarType()->isDoubleTy() &&
- "Only expand double or double2");
+ Type *ScalarTy = BufferTy->getScalarType();
+ bool IsDouble = ScalarTy->isDoubleTy();
+ assert(IsDouble || ScalarTy->isIntegerTy(64) &&
+ "Only expand double or int64 scalars or vectors");
unsigned ExtractNum = 2;
if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
assert(VT->getNumElements() == 2 &&
- "TypedBufferLoad double vector has wrong size");
+ "TypedBufferLoad vector must be size 2");
ExtractNum = 4;
}
@@ -570,22 +575,54 @@ static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
ExtractElements.push_back(
Builder.CreateExtractElement(Extract, Builder.getInt32(I)));
- // combine into double(s)
+ // combine into double(s) or int64(s)
Value *Result = PoisonValue::get(BufferTy);
for (unsigned I = 0; I < ExtractNum; I += 2) {
- Value *Dbl =
- Builder.CreateIntrinsic(Builder.getDoubleTy(), Intrinsic::dx_asdouble,
- {ExtractElements[I], ExtractElements[I + 1]});
+ Value *Combined = nullptr;
+ if (IsDouble) {
+ // For doubles, use dx_asdouble intrinsic
+ Combined =
+ Builder.CreateIntrinsic(Builder.getDoubleTy(), Intrinsic::dx_asdouble,
+ {ExtractElements[I], ExtractElements[I + 1]});
+ } else {
+ // For int64, manually combine two int32s
+ // First, zero-extend both values to i64
+ Value *Lo = Builder.CreateZExt(ExtractElements[I], Builder.getInt64Ty());
+ Value *Hi =
+ Builder.CreateZExt(ExtractElements[I + 1], Builder.getInt64Ty());
+ // Shift the high bits left by 32 bits
+ Value *ShiftedHi = Builder.CreateShl(Hi, Builder.getInt64(32));
+ // OR the high and low bits together
+ Combined = Builder.CreateOr(Lo, ShiftedHi);
+ }
+
if (ExtractNum == 4)
- Result =
- Builder.CreateInsertElement(Result, Dbl, Builder.getInt32(I / 2));
+ Result = Builder.CreateInsertElement(Result, Combined,
+ Builder.getInt32(I / 2));
else
- Result = Dbl;
+ Result = Combined;
}
Value *CheckBit = nullptr;
for (User *U : make_early_inc_range(Orig->users())) {
- auto *EVI = cast<ExtractValueInst>(U);
+ if (auto *Ret = dyn_cast<ReturnInst>(U)) {
+ // For return instructions, we need to handle the case where the function
+ // is directly returning the result of the call
+ Type *RetTy = Ret->getFunction()->getReturnType();
+ Value *StructRet = PoisonValue::get(RetTy);
+ StructRet = Builder.CreateInsertValue(StructRet, Result, {0});
+ Value *CheckBitForRet = Builder.CreateExtractValue(Load, {1});
+ StructRet = Builder.CreateInsertValue(StructRet, CheckBitForRet, {1});
+ Ret->setOperand(0, StructRet);
+ continue;
+ }
+ auto *EVI = dyn_cast<ExtractValueInst>(U);
+ if (!EVI) {
+ // If it's not a ReturnInst or ExtractValueInst, we don't know how to
+ // handle it
+ llvm_unreachable("Unexpected user of typedbufferload");
+ }
+
ArrayRef<unsigned> Indices = EVI->getIndices();
assert(Indices.size() == 1);
@@ -609,38 +646,90 @@ static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) {
IRBuilder<> Builder(Orig);
Type *BufferTy = Orig->getFunctionType()->getParamType(2);
- assert(BufferTy->getScalarType()->isDoubleTy() &&
- "Only expand double or double2");
+ Type *ScalarTy = BufferTy->getScalarType();
+ bool IsDouble = ScalarTy->isDoubleTy();
+ assert((IsDouble || ScalarTy->isIntegerTy(64)) &&
+ "Only expand double or int64 scalars or vectors");
unsigned ExtractNum = 2;
if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
assert(VT->getNumElements() == 2 &&
- "TypedBufferStore double vector has wrong size");
+ "TypedBufferStore vector must be size 2");
ExtractNum = 4;
}
+ if (IsDouble) {
+ Type *SplitElementTy = Builder.getInt32Ty();
+ if (ExtractNum == 4)
+ SplitElementTy = VectorType::get(SplitElementTy, 2, false);
+
+ // Handle double type(s) - keep original behavior
+ auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);
+ Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
+ {Orig->getOperand(2)});
+ // create our vector
+ Value *LowBits = Builder.CreateExtractValue(Split, 0);
+ Value *HighBits = Builder.CreateExtractValue(Split, 1);
+ Value *Val;
+ if (ExtractNum == 2) {
+ Val = PoisonValue::get(VectorType::get(Builder.getInt32Ty(), 2, false));
+ Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
+ Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
+ } else
+ Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});
+
+ Builder.CreateIntrinsic(Builder.getVoidTy(),
+ Intrinsic::dx_resource_store_typedbuffer,
+ {Orig->getOperand(0), Orig->getOperand(1), Val});
+ } else {
+ // Handle int64 type(s)
+ Value *InputVal = Orig->getOperand(2);
+ Value *Val;
+
+ if (ExtractNum == 4) {
+ // Handle vector of int64
+ Type *Int32x4Ty = VectorType::get(Builder.getInt32Ty(), 4, false);
+ Val = PoisonValue::get(Int32x4Ty);
+
+ for (unsigned I = 0; I < 2; ++I) {
+ // Extract each int64 element
+ Value *Int64Val =
+ Builder.CreateExtractElement(InputVal, Builder.getInt32(I));
+
+ // Get low 32 bits by truncating to i32
+ Value *LowBits = Builder.CreateTrunc(Int64Val, Builder.getInt32Ty());
+
+ // Get high 32 bits by shifting right by 32 and truncating
+ Value *ShiftedVal = Builder.CreateLShr(Int64Val, Builder.getInt64(32));
+ Value *HighBits = Builder.CreateTrunc(ShiftedVal, Builder.getInt32Ty());
+
+ // Insert into our final vector
+ Val =
+ Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(I * 2));
+ Val = Builder.CreateInsertElement(Val, HighBits,
+ Builder.getInt32(I * 2 + 1));
+ }
+ } else {
+ // Handle scalar int64
+ Type *Int32x2Ty = VectorType::get(Builder.getInt32Ty(), 2, false);
+ Val = PoisonValue::get(Int32x2Ty);
+
+ // Get low 32 bits by truncating to i32
+ Value *LowBits = Builder.CreateTrunc(InputVal, Builder.getInt32Ty());
+
+ // Get high 32 bits by shifting right by 32 and truncating
+ Value *ShiftedVal = Builder.CreateLShr(InputVal, Builder.getInt64(32));
+ Value *HighBits = Builder.CreateTrunc(ShiftedVal, Builder.getInt32Ty());
+
+ // Insert into our final vector
+ Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
+ Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
+ }
+
+ Builder.CreateIntrinsic(Builder.getVoidTy(),
+ Intrinsic::dx_resource_store_typedbuffer,
+ {Orig->getOperand(0), Orig->getOperand(1), Val});
+ }
- Type *SplitElementTy = Builder.getInt32Ty();
- if (ExtractNum == 4)
- SplitElementTy = VectorType::get(SplitElementTy, 2, false);
-
- // split our double(s)
- auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);
- Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
- Orig->getOperand(2));
- // create our vector
- Value *LowBits = Builder.CreateExtractValue(Split, 0);
- Value *HighBits = Builder.CreateExtractValue(Split, 1);
- Value *Val;
- if (ExtractNum == 2) {
- Val = PoisonValue::get(VectorType::get(SplitElementTy, 2, false));
- Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
- Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
- } else
- Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});
-
- Builder.CreateIntrinsic(Builder.getVoidTy(),
- Intrinsic::dx_resource_store_typedbuffer,
- {Orig->getOperand(0), Orig->getOperand(1), Val});
Orig->eraseFromParent();
return true;
}
diff --git a/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll b/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
index 80a071a66364b..af3ec9df37967 100644
--- a/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
+++ b/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
@@ -1,6 +1,6 @@
; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s
-target triple = "dxil-pc-shadermodel6.6-compute"
+target triple = "dxil-pc-shadermodel6.2-compute"
define void @loadf64() {
; check the handle from binding is unchanged
@@ -88,4 +88,4 @@ define void @loadf64WithCheckBit() {
; CHECK-NOT: extractvalue { double, i1 }
%cb = extractvalue {double, i1} %load0, 1
ret void
-}
\ No newline at end of file
+}
diff --git a/llvm/test/CodeGen/DirectX/BufferLoadInt64.ll b/llvm/test/CodeGen/DirectX/BufferLoadInt64.ll
new file mode 100644
index 0000000000000..cea475524945c
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/BufferLoadInt64.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.2-compute"
+
+define { i64, i1 } @loadi64() {
+; CHECK-LABEL: define { i64, i1 } @loadi64() {
+; CHECK-NEXT: [[BUFFER:%.*]] = tail call target("dx.TypedBuffer", i64, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+; CHECK-NEXT: [[TMP1:%.*]] = call { <2 x i32>, i1 } @llvm.dx.resource.load.typedbuffer.v2i32.tdx.TypedBuffer_i64_1_0_0t(target("dx.TypedBuffer", i64, 1, 0, 0) [[BUFFER]], i32 0)
+; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i32>, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
+; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP8]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <2 x i32>, i1 } [[TMP1]], 1
+; CHECK-NEXT: [[TMP11:%.*]] = insertvalue { i64, i1 } [[TMP9]], i1 [[TMP10]], 1
+; CHECK-NEXT: ret { i64, i1 } [[TMP11]]
+;
+ %buffer = tail call target("dx.TypedBuffer", i64, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+ %result = call { i64, i1 } @llvm.dx.resource.load.typedbuffer.tdx.TypedBuffer_i64_1_0_0t(
+ target("dx.TypedBuffer", i64, 1, 0, 0) %buffer, i32 0)
+ ret { i64, i1 } %result
+}
+
+define { <2 x i64>, i1 } @loadv2i64() {
+; CHECK-LABEL: define { <2 x i64>, i1 } @loadv2i64() {
+; CHECK-NEXT: [[BUFFER:%.*]] = tail call target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+; CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, i1 } @llvm.dx.resource.load.typedbuffer.v4i32.tdx.TypedBuffer_v2i64_1_0_0t(target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) [[BUFFER]], i32 0)
+; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
+; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP3]] to i64
+; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP4]] to i64
+; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 32
+; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP7]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP5]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP13]], 32
+; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP12]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP15]], i32 1
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <2 x i64>, i1 } poison, <2 x i64> [[TMP16]], 0
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <4 x i32>, i1 } [[TMP1]], 1
+; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { <2 x i64>, i1 } [[TMP17]], i1 [[TMP18]], 1
+; CHECK-NEXT: ret { <2 x i64>, i1 } [[TMP19]]
+;
+ %buffer = tail call target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+ %result = call { <2 x i64>, i1 } @llvm.dx.resource.load.typedbuffer.tdx.TypedBuffer_v2i64_1_0_0t(
+ target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) %buffer, i32 0)
+ ret { <2 x i64>, i1 } %result
+}
diff --git a/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll b/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll
index 9c3dab0cc1e46..882948b6dce74 100644
--- a/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll
+++ b/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll
@@ -45,3 +45,46 @@ define void @storev2f64(<2 x double> %0) {
<2 x double> %0)
ret void
}
+
+define { double, i1 } @loadAndReturnf64() {
+; CHECK-LABEL: define { double, i1 } @loadAndReturnf64() {
+; CHECK-NEXT: [[BUFFER:%.*]] = tail call target("dx.TypedBuffer", double, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+; CHECK-NEXT: [[TMP1:%.*]] = call { <2 x i32>, i1 } @llvm.dx.resource.load.typedbuffer.v2i32.tdx.TypedBuffer_f64_1_0_0t(target("dx.TypedBuffer", double, 1, 0, 0) [[BUFFER]], i32 0)
+; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i32>, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[TMP3]], i32 [[TMP4]])
+; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { double, i1 } poison, double [[TMP5]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i32>, i1 } [[TMP1]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP6]], i1 [[TMP7]], 1
+; CHECK-NEXT: ret { double, i1 } [[TMP8]]
+;
+ %buffer = tail call target("dx.TypedBuffer", double, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+ %result = call { double, i1 } @llvm.dx.resource.load.typedbuffer.tdx.TypedBuffer_f64_1_0_0t(
+ target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0)
+ ret { double, i1 } %result
+}
+
+define { <2 x double>, i1 } @loadAndReturnv2f64() {
+; CHECK-LABEL: define { <2 x double>, i1 } @loadAndReturnv2f64() {
+; CHECK-NEXT: [[BUFFER:%.*]] = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+; CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, i1 } @llvm.dx.resource.load.typedbuffer.v4i32.tdx.TypedBuffer_v2f64_1_0_0t(target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[BUFFER]], i32 0)
+; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
+; CHECK-NEXT: [[TMP7:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[TMP3]], i32 [[TMP4]])
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[TMP7]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[TMP5]], i32 [[TMP6]])
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[TMP8]], double [[TMP9]], i32 1
+; CHECK-NEXT: [[TMP11:%.*]] = insertvalue { <2 x double>, i1 } poison, <2 x double> [[TMP10]], 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { <4 x i32>, i1 } [[TMP1]], 1
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <2 x double>, i1 } [[TMP11]], i1 [[TMP12]], 1
+; CHECK-NEXT: ret { <2 x double>, i1 } [[TMP13]]
+;
+ %buffer = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+ %result = call { <2 x double>, i1 } @llvm.dx.resource.load.typedbuffer.tdx.TypedBuffer_v2f64_1_0_0t(
+ target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0)
+ ret { <2 x double>, i1 } %result
+}
diff --git a/llvm/test/CodeGen/DirectX/BufferStoreInt64.ll b/llvm/test/CodeGen/DirectX/BufferStoreInt64.ll
new file mode 100644
index 0000000000000..efb7c0ac104ed
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/BufferStoreInt64.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+define void @storei64(i64 %0) {
+; CHECK-LABEL: define void @storei64(
+; CHECK-SAME: i64 [[TMP0:%.*]]) {
+; CHECK-NEXT: [[BUFFER:%.*]] = tail call target("dx.TypedBuffer", i64, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 32
+; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP4]], i32 1
+; CHECK-NEXT: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_i64_1_0_0t.v2i32(target("dx.TypedBuffer", i64, 1, 0, 0) [[BUFFER]], i32 0, <2 x i32> [[TMP6]])
+; CHECK-NEXT: ret void
+;
+ %buffer = tail call target("dx.TypedBuffer", i64, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+ call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_i64_1_0_0t(target("dx.TypedBuffer", i64, 1, 0, 0) %buffer, i32 0,i64 %0)
+ ret void
+}
+
+
+define void @storev2i64(<2 x i64> %0) {
+; CHECK-LABEL: define void @storev2i64(
+; CHECK-SAME: <2 x i64> [[TMP0:%.*]]) {
+; CHECK-NEXT: [[BUFFER:%.*]] = tail call target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP2]], 32
+; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP5]], i32 1
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = lshr i64 [[TMP8]], 32
+; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP10]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP9]], i32 2
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP11]], i32 3
+; CHECK-NEXT: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2i64_1_0_0t.v4i32(target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) [[BUFFER]], i32 0, <4 x i32> [[TMP13]])
+; CHECK-NEXT: ret void
+;
+ %buffer = tail call target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+ call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2i64_1_0_0t(target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) %buffer, i32 0, <2 x i64> %0)
+ ret void
+}
>From e7d5e228c41ee4b77f3ed2d86489c73c6c8d8268 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Fri, 20 Jun 2025 10:58:21 -0400
Subject: [PATCH 2/3] minimize code diff between double and i64
---
.../Target/DirectX/DXILIntrinsicExpansion.cpp | 74 +++++++++----------
1 file changed, 33 insertions(+), 41 deletions(-)
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index eb9268e78a9ad..45d8e497165cf 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -651,58 +651,56 @@ static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) {
assert((IsDouble || ScalarTy->isIntegerTy(64)) &&
"Only expand double or int64 scalars or vectors");
- unsigned ExtractNum = 2;
- if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
- assert(VT->getNumElements() == 2 &&
+ // Determine if we're dealing with a vector or scalar
+ bool IsVector = isa<FixedVectorType>(BufferTy);
+ if (IsVector) {
+ assert(cast<FixedVectorType>(BufferTy)->getNumElements() == 2 &&
"TypedBufferStore vector must be size 2");
- ExtractNum = 4;
}
+
+ // Create the appropriate vector type for the result
+ Type *Int32Ty = Builder.getInt32Ty();
+ Type *ResultTy = VectorType::get(Int32Ty, IsVector ? 4 : 2, false);
+ Value *Val = PoisonValue::get(ResultTy);
+
+ // Split the 64-bit values into 32-bit components
if (IsDouble) {
- Type *SplitElementTy = Builder.getInt32Ty();
- if (ExtractNum == 4)
+ // Handle double type(s)
+ Type *SplitElementTy = Int32Ty;
+ if (IsVector)
SplitElementTy = VectorType::get(SplitElementTy, 2, false);
- // Handle double type(s) - keep original behavior
auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);
Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
{Orig->getOperand(2)});
- // create our vector
Value *LowBits = Builder.CreateExtractValue(Split, 0);
Value *HighBits = Builder.CreateExtractValue(Split, 1);
- Value *Val;
- if (ExtractNum == 2) {
- Val = PoisonValue::get(VectorType::get(Builder.getInt32Ty(), 2, false));
+
+ if (IsVector) {
+ // For vector doubles, use shuffle to create the final vector
+ Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});
+ } else {
+ // For scalar doubles, insert the elements
Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
- } else
- Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});
-
- Builder.CreateIntrinsic(Builder.getVoidTy(),
- Intrinsic::dx_resource_store_typedbuffer,
- {Orig->getOperand(0), Orig->getOperand(1), Val});
+ }
} else {
// Handle int64 type(s)
Value *InputVal = Orig->getOperand(2);
- Value *Val;
- if (ExtractNum == 4) {
+ if (IsVector) {
// Handle vector of int64
- Type *Int32x4Ty = VectorType::get(Builder.getInt32Ty(), 4, false);
- Val = PoisonValue::get(Int32x4Ty);
-
for (unsigned I = 0; I < 2; ++I) {
// Extract each int64 element
Value *Int64Val =
Builder.CreateExtractElement(InputVal, Builder.getInt32(I));
- // Get low 32 bits by truncating to i32
- Value *LowBits = Builder.CreateTrunc(Int64Val, Builder.getInt32Ty());
-
- // Get high 32 bits by shifting right by 32 and truncating
+ // Split into low and high 32-bit parts
+ Value *LowBits = Builder.CreateTrunc(Int64Val, Int32Ty);
Value *ShiftedVal = Builder.CreateLShr(Int64Val, Builder.getInt64(32));
- Value *HighBits = Builder.CreateTrunc(ShiftedVal, Builder.getInt32Ty());
+ Value *HighBits = Builder.CreateTrunc(ShiftedVal, Int32Ty);
- // Insert into our final vector
+ // Insert into result vector
Val =
Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(I * 2));
Val = Builder.CreateInsertElement(Val, HighBits,
@@ -710,26 +708,20 @@ static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) {
}
} else {
// Handle scalar int64
- Type *Int32x2Ty = VectorType::get(Builder.getInt32Ty(), 2, false);
- Val = PoisonValue::get(Int32x2Ty);
-
- // Get low 32 bits by truncating to i32
- Value *LowBits = Builder.CreateTrunc(InputVal, Builder.getInt32Ty());
-
- // Get high 32 bits by shifting right by 32 and truncating
+ Value *LowBits = Builder.CreateTrunc(InputVal, Int32Ty);
Value *ShiftedVal = Builder.CreateLShr(InputVal, Builder.getInt64(32));
- Value *HighBits = Builder.CreateTrunc(ShiftedVal, Builder.getInt32Ty());
+ Value *HighBits = Builder.CreateTrunc(ShiftedVal, Int32Ty);
- // Insert into our final vector
Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
}
-
- Builder.CreateIntrinsic(Builder.getVoidTy(),
- Intrinsic::dx_resource_store_typedbuffer,
- {Orig->getOperand(0), Orig->getOperand(1), Val});
}
+ // Create the final intrinsic call
+ Builder.CreateIntrinsic(Builder.getVoidTy(),
+ Intrinsic::dx_resource_store_typedbuffer,
+ {Orig->getOperand(0), Orig->getOperand(1), Val});
+
Orig->eraseFromParent();
return true;
}
>From f7b3d844001508f4bb8cd58d9af0ae1c76e8daf9 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Fri, 20 Jun 2025 14:16:19 -0400
Subject: [PATCH 3/3] remove return handling
---
.../Target/DirectX/DXILIntrinsicExpansion.cpp | 18 ++------
llvm/test/CodeGen/DirectX/BufferLoadDouble.ll | 2 +-
llvm/test/CodeGen/DirectX/BufferLoadInt64.ll | 28 +++++-------
.../test/CodeGen/DirectX/BufferStoreDouble.ll | 43 -------------------
4 files changed, 14 insertions(+), 77 deletions(-)
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index 45d8e497165cf..d50279461800e 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -605,23 +605,11 @@ static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
Value *CheckBit = nullptr;
for (User *U : make_early_inc_range(Orig->users())) {
- if (auto *Ret = dyn_cast<ReturnInst>(U)) {
- // For return instructions, we need to handle the case where the function
- // is directly returning the result of the call
- Type *RetTy = Ret->getFunction()->getReturnType();
- Value *StructRet = PoisonValue::get(RetTy);
- StructRet = Builder.CreateInsertValue(StructRet, Result, {0});
- Value *CheckBitForRet = Builder.CreateExtractValue(Load, {1});
- StructRet = Builder.CreateInsertValue(StructRet, CheckBitForRet, {1});
- Ret->setOperand(0, StructRet);
- continue;
- }
+ // If it's not a ExtractValueInst, we don't know how to
+ // handle it
auto *EVI = dyn_cast<ExtractValueInst>(U);
- if (!EVI) {
- // If it's not a ReturnInst or ExtractValueInst, we don't know how to
- // handle it
+ if (!EVI)
llvm_unreachable("Unexpected user of typedbufferload");
- }
ArrayRef<unsigned> Indices = EVI->getIndices();
assert(Indices.size() == 1);
diff --git a/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll b/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
index af3ec9df37967..25abf2111060c 100644
--- a/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
+++ b/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
@@ -1,6 +1,6 @@
; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s
-target triple = "dxil-pc-shadermodel6.2-compute"
+target triple = "dxil-pc-shadermodel6.6-compute"
define void @loadf64() {
; check the handle from binding is unchanged
diff --git a/llvm/test/CodeGen/DirectX/BufferLoadInt64.ll b/llvm/test/CodeGen/DirectX/BufferLoadInt64.ll
index cea475524945c..42c0012ff3475 100644
--- a/llvm/test/CodeGen/DirectX/BufferLoadInt64.ll
+++ b/llvm/test/CodeGen/DirectX/BufferLoadInt64.ll
@@ -3,8 +3,8 @@
target triple = "dxil-pc-shadermodel6.2-compute"
-define { i64, i1 } @loadi64() {
-; CHECK-LABEL: define { i64, i1 } @loadi64() {
+define void @loadi64() {
+; CHECK-LABEL: define void @loadi64() {
; CHECK-NEXT: [[BUFFER:%.*]] = tail call target("dx.TypedBuffer", i64, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
; CHECK-NEXT: [[TMP1:%.*]] = call { <2 x i32>, i1 } @llvm.dx.resource.load.typedbuffer.v2i32.tdx.TypedBuffer_i64_1_0_0t(target("dx.TypedBuffer", i64, 1, 0, 0) [[BUFFER]], i32 0)
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i32>, i1 } [[TMP1]], 0
@@ -14,19 +14,15 @@ define { i64, i1 } @loadi64() {
; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP8]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <2 x i32>, i1 } [[TMP1]], 1
-; CHECK-NEXT: [[TMP11:%.*]] = insertvalue { i64, i1 } [[TMP9]], i1 [[TMP10]], 1
-; CHECK-NEXT: ret { i64, i1 } [[TMP11]]
+; CHECK-NEXT: ret void
;
%buffer = tail call target("dx.TypedBuffer", i64, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
- %result = call { i64, i1 } @llvm.dx.resource.load.typedbuffer.tdx.TypedBuffer_i64_1_0_0t(
- target("dx.TypedBuffer", i64, 1, 0, 0) %buffer, i32 0)
- ret { i64, i1 } %result
+ %result = call { i64, i1 } @llvm.dx.resource.load.typedbuffer.tdx.TypedBuffer_i64_1_0_0t(target("dx.TypedBuffer", i64, 1, 0, 0) %buffer, i32 0)
+ ret void
}
-define { <2 x i64>, i1 } @loadv2i64() {
-; CHECK-LABEL: define { <2 x i64>, i1 } @loadv2i64() {
+define void @loadv2i64() {
+; CHECK-LABEL: define void @loadv2i64() {
; CHECK-NEXT: [[BUFFER:%.*]] = tail call target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
; CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, i1 } @llvm.dx.resource.load.typedbuffer.v4i32.tdx.TypedBuffer_v2i64_1_0_0t(target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) [[BUFFER]], i32 0)
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, i1 } [[TMP1]], 0
@@ -44,13 +40,9 @@ define { <2 x i64>, i1 } @loadv2i64() {
; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP13]], 32
; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP12]], [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP15]], i32 1
-; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <2 x i64>, i1 } poison, <2 x i64> [[TMP16]], 0
-; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <4 x i32>, i1 } [[TMP1]], 1
-; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { <2 x i64>, i1 } [[TMP17]], i1 [[TMP18]], 1
-; CHECK-NEXT: ret { <2 x i64>, i1 } [[TMP19]]
+; CHECK-NEXT: ret void
;
%buffer = tail call target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
- %result = call { <2 x i64>, i1 } @llvm.dx.resource.load.typedbuffer.tdx.TypedBuffer_v2i64_1_0_0t(
- target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) %buffer, i32 0)
- ret { <2 x i64>, i1 } %result
+ %result = call { <2 x i64>, i1 } @llvm.dx.resource.load.typedbuffer.tdx.TypedBuffer_v2i64_1_0_0t(target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) %buffer, i32 0)
+ ret void
}
diff --git a/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll b/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll
index 882948b6dce74..9c3dab0cc1e46 100644
--- a/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll
+++ b/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll
@@ -45,46 +45,3 @@ define void @storev2f64(<2 x double> %0) {
<2 x double> %0)
ret void
}
-
-define { double, i1 } @loadAndReturnf64() {
-; CHECK-LABEL: define { double, i1 } @loadAndReturnf64() {
-; CHECK-NEXT: [[BUFFER:%.*]] = tail call target("dx.TypedBuffer", double, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
-; CHECK-NEXT: [[TMP1:%.*]] = call { <2 x i32>, i1 } @llvm.dx.resource.load.typedbuffer.v2i32.tdx.TypedBuffer_f64_1_0_0t(target("dx.TypedBuffer", double, 1, 0, 0) [[BUFFER]], i32 0)
-; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i32>, i1 } [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
-; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[TMP3]], i32 [[TMP4]])
-; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { double, i1 } poison, double [[TMP5]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i32>, i1 } [[TMP1]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP6]], i1 [[TMP7]], 1
-; CHECK-NEXT: ret { double, i1 } [[TMP8]]
-;
- %buffer = tail call target("dx.TypedBuffer", double, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
- %result = call { double, i1 } @llvm.dx.resource.load.typedbuffer.tdx.TypedBuffer_f64_1_0_0t(
- target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0)
- ret { double, i1 } %result
-}
-
-define { <2 x double>, i1 } @loadAndReturnv2f64() {
-; CHECK-LABEL: define { <2 x double>, i1 } @loadAndReturnv2f64() {
-; CHECK-NEXT: [[BUFFER:%.*]] = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
-; CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, i1 } @llvm.dx.resource.load.typedbuffer.v4i32.tdx.TypedBuffer_v2f64_1_0_0t(target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[BUFFER]], i32 0)
-; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, i1 } [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
-; CHECK-NEXT: [[TMP7:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[TMP3]], i32 [[TMP4]])
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[TMP7]], i32 0
-; CHECK-NEXT: [[TMP9:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[TMP5]], i32 [[TMP6]])
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[TMP8]], double [[TMP9]], i32 1
-; CHECK-NEXT: [[TMP11:%.*]] = insertvalue { <2 x double>, i1 } poison, <2 x double> [[TMP10]], 0
-; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { <4 x i32>, i1 } [[TMP1]], 1
-; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <2 x double>, i1 } [[TMP11]], i1 [[TMP12]], 1
-; CHECK-NEXT: ret { <2 x double>, i1 } [[TMP13]]
-;
- %buffer = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
- %result = call { <2 x double>, i1 } @llvm.dx.resource.load.typedbuffer.tdx.TypedBuffer_v2f64_1_0_0t(
- target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0)
- ret { <2 x double>, i1 } %result
-}
More information about the llvm-commits
mailing list