[llvm] [DirectX] Add support for Raw Buffer Loads and Stores for scalars and vectors of doubles and i64s in SM6.2 and earlier (PR #146627)
Sarah Spall via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 3 08:20:11 PDT 2025
================
@@ -544,63 +555,81 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) {
return Builder.CreateFMul(X, PiOver180);
}
-static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
+static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw) {
IRBuilder<> Builder(Orig);
Type *BufferTy = Orig->getType()->getStructElementType(0);
Type *ScalarTy = BufferTy->getScalarType();
bool IsDouble = ScalarTy->isDoubleTy();
assert(IsDouble || ScalarTy->isIntegerTy(64) &&
"Only expand double or int64 scalars or vectors");
+ bool IsVector = isa<FixedVectorType>(BufferTy);
unsigned ExtractNum = 2;
if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
- assert(VT->getNumElements() == 2 &&
- "TypedBufferLoad vector must be size 2");
- ExtractNum = 4;
+ if (!IsRaw)
+ assert(VT->getNumElements() == 2 &&
+ "TypedBufferLoad vector must be size 2");
+ ExtractNum = 2 * VT->getNumElements();
}
- Type *Ty = VectorType::get(Builder.getInt32Ty(), ExtractNum, false);
-
- Type *LoadType = StructType::get(Ty, Builder.getInt1Ty());
- CallInst *Load =
- Builder.CreateIntrinsic(LoadType, Intrinsic::dx_resource_load_typedbuffer,
- {Orig->getOperand(0), Orig->getOperand(1)});
-
- // extract the buffer load's result
- Value *Extract = Builder.CreateExtractValue(Load, {0});
-
- SmallVector<Value *> ExtractElements;
- for (unsigned I = 0; I < ExtractNum; ++I)
- ExtractElements.push_back(
- Builder.CreateExtractElement(Extract, Builder.getInt32(I)));
-
- // combine into double(s) or int64(s)
+ SmallVector<Value *, 2> Loads;
Value *Result = PoisonValue::get(BufferTy);
- for (unsigned I = 0; I < ExtractNum; I += 2) {
- Value *Combined = nullptr;
- if (IsDouble)
- // For doubles, use dx_asdouble intrinsic
- Combined =
- Builder.CreateIntrinsic(Builder.getDoubleTy(), Intrinsic::dx_asdouble,
- {ExtractElements[I], ExtractElements[I + 1]});
- else {
- // For int64, manually combine two int32s
- // First, zero-extend both values to i64
- Value *Lo = Builder.CreateZExt(ExtractElements[I], Builder.getInt64Ty());
- Value *Hi =
- Builder.CreateZExt(ExtractElements[I + 1], Builder.getInt64Ty());
- // Shift the high bits left by 32 bits
- Value *ShiftedHi = Builder.CreateShl(Hi, Builder.getInt64(32));
- // OR the high and low bits together
- Combined = Builder.CreateOr(Lo, ShiftedHi);
+ unsigned Base = 0;
+ while (ExtractNum > 0) {
+ unsigned LoadNum = std::min(ExtractNum, 4u);
+ Type *Ty = VectorType::get(Builder.getInt32Ty(), LoadNum, false);
+
+ Type *LoadType = StructType::get(Ty, Builder.getInt1Ty());
+ Intrinsic::ID LoadIntrinsic = Intrinsic::dx_resource_load_typedbuffer;
+ SmallVector<Value *, 3> Args = {Orig->getOperand(0), Orig->getOperand(1)};
+ if (IsRaw) {
+ LoadIntrinsic = Intrinsic::dx_resource_load_rawbuffer;
+ Value *Tmp = Builder.getInt32(4 * Base * 2);
+ Args.push_back(Builder.CreateAdd(Orig->getOperand(2), Tmp));
}
- if (ExtractNum == 4)
- Result = Builder.CreateInsertElement(Result, Combined,
- Builder.getInt32(I / 2));
- else
- Result = Combined;
+ CallInst *Load = Builder.CreateIntrinsic(LoadType, LoadIntrinsic, Args);
+ Loads.push_back(Load);
+
+ // extract the buffer load's result
+ Value *Extract = Builder.CreateExtractValue(Load, {0});
+
+ SmallVector<Value *> ExtractElements;
+ for (unsigned I = 0; I < LoadNum; ++I)
+ ExtractElements.push_back(
+ Builder.CreateExtractElement(Extract, Builder.getInt32(I)));
+
+ // combine into double(s) or int64(s)
+ for (unsigned I = 0; I < LoadNum; I += 2) {
+ Value *Combined = nullptr;
+ if (IsDouble)
+ // For doubles, use dx_asdouble intrinsic
+ Combined = Builder.CreateIntrinsic(
+ Builder.getDoubleTy(), Intrinsic::dx_asdouble,
+ {ExtractElements[I], ExtractElements[I + 1]});
+ else {
+ // For int64, manually combine two int32s
+ // First, zero-extend both values to i64
+ Value *Lo =
+ Builder.CreateZExt(ExtractElements[I], Builder.getInt64Ty());
+ Value *Hi =
+ Builder.CreateZExt(ExtractElements[I + 1], Builder.getInt64Ty());
+ // Shift the high bits left by 32 bits
+ Value *ShiftedHi = Builder.CreateShl(Hi, Builder.getInt64(32));
+ // OR the high and low bits together
+ Combined = Builder.CreateOr(Lo, ShiftedHi);
+ }
+
+ if (IsVector)
+ Result = Builder.CreateInsertElement(Result, Combined,
+ Builder.getInt32((I / 2) + Base));
----------------
spall wrote:
For an odd length vector of length 3, we still need our Result to be a vector, and ExtractNum changes during the loop, so ExtractNum would be 2 when we're loading the 3rd value in a vec3.
https://github.com/llvm/llvm-project/pull/146627
More information about the llvm-commits
mailing list