[llvm] [DirectX] add support for i64 buffer load/stores (PR #145047)
Justin Bogner via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 20 12:18:50 PDT 2025
================
@@ -609,38 +634,82 @@ static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) {
IRBuilder<> Builder(Orig);
Type *BufferTy = Orig->getFunctionType()->getParamType(2);
- assert(BufferTy->getScalarType()->isDoubleTy() &&
- "Only expand double or double2");
-
- unsigned ExtractNum = 2;
- if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
- assert(VT->getNumElements() == 2 &&
- "TypedBufferStore double vector has wrong size");
- ExtractNum = 4;
+ Type *ScalarTy = BufferTy->getScalarType();
+ bool IsDouble = ScalarTy->isDoubleTy();
+ assert((IsDouble || ScalarTy->isIntegerTy(64)) &&
+ "Only expand double or int64 scalars or vectors");
+
+ // Determine if we're dealing with a vector or scalar
+ bool IsVector = isa<FixedVectorType>(BufferTy);
+ if (IsVector) {
+ assert(cast<FixedVectorType>(BufferTy)->getNumElements() == 2 &&
+ "TypedBufferStore vector must be size 2");
}
- Type *SplitElementTy = Builder.getInt32Ty();
- if (ExtractNum == 4)
- SplitElementTy = VectorType::get(SplitElementTy, 2, false);
-
- // split our double(s)
- auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);
- Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
- Orig->getOperand(2));
- // create our vector
- Value *LowBits = Builder.CreateExtractValue(Split, 0);
- Value *HighBits = Builder.CreateExtractValue(Split, 1);
- Value *Val;
- if (ExtractNum == 2) {
- Val = PoisonValue::get(VectorType::get(SplitElementTy, 2, false));
- Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
- Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
- } else
- Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});
+ // Create the appropriate vector type for the result
+ Type *Int32Ty = Builder.getInt32Ty();
+ Type *ResultTy = VectorType::get(Int32Ty, IsVector ? 4 : 2, false);
+ Value *Val = PoisonValue::get(ResultTy);
+
+ // Split the 64-bit values into 32-bit components
+ if (IsDouble) {
+ // Handle double type(s)
+ Type *SplitElementTy = Int32Ty;
+ if (IsVector)
+ SplitElementTy = VectorType::get(SplitElementTy, 2, false);
+
+ auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);
+ Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
+ {Orig->getOperand(2)});
+ Value *LowBits = Builder.CreateExtractValue(Split, 0);
+ Value *HighBits = Builder.CreateExtractValue(Split, 1);
+
+ if (IsVector) {
+ // For vector doubles, use shuffle to create the final vector
+ Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});
+ } else {
+ // For scalar doubles, insert the elements
+ Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
+ Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
+ }
+ } else {
+ // Handle int64 type(s)
+ Value *InputVal = Orig->getOperand(2);
+
+ if (IsVector) {
+ // Handle vector of int64
+ for (unsigned I = 0; I < 2; ++I) {
+ // Extract each int64 element
+ Value *Int64Val =
+ Builder.CreateExtractElement(InputVal, Builder.getInt32(I));
+
+ // Split into low and high 32-bit parts
+ Value *LowBits = Builder.CreateTrunc(Int64Val, Int32Ty);
+ Value *ShiftedVal = Builder.CreateLShr(Int64Val, Builder.getInt64(32));
+ Value *HighBits = Builder.CreateTrunc(ShiftedVal, Int32Ty);
+
+ // Insert into result vector
+ Val =
+ Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(I * 2));
+ Val = Builder.CreateInsertElement(Val, HighBits,
+ Builder.getInt32(I * 2 + 1));
+ }
+ } else {
+ // Handle scalar int64
+ Value *LowBits = Builder.CreateTrunc(InputVal, Int32Ty);
+ Value *ShiftedVal = Builder.CreateLShr(InputVal, Builder.getInt64(32));
+ Value *HighBits = Builder.CreateTrunc(ShiftedVal, Int32Ty);
----------------
bogner wrote:
I think you can combine the scalar and vector logic if you avoid scalarizing this by hand. Ie, you could do something like:
```c++
Constant *ShiftAmt = Builder.getInt64(32);
if (IsVector)
ShiftAmt = ConstantVector::getSplat(ElementCount::getFixed(2), ShiftAmt);
// Split into low and high 32-bit parts
Value *LowBits = Builder.CreateTrunc(InputVal, SplitElementTy);
Value *ShiftedVal = Builder.CreateLShr(InputVal, ShiftAmt);
Value *HighBits = Builder.CreateTrunc(ShiftedVal, SplitElementTy);
```
Then I think this fits nicely with the logic for doubles above where we're just changing the conversion operation itself and the surrounding IR is unchanged.
https://github.com/llvm/llvm-project/pull/145047
More information about the llvm-commits
mailing list