[llvm] [DirectX] add support for i64 buffer load/stores (PR #145047)

Fri Jun 20 12:18:50 PDT 2025

================
@@ -609,38 +634,82 @@ static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) {
   IRBuilder<> Builder(Orig);
 
   Type *BufferTy = Orig->getFunctionType()->getParamType(2);
-  assert(BufferTy->getScalarType()->isDoubleTy() &&
-         "Only expand double or double2");
-
-  unsigned ExtractNum = 2;
-  if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
-    assert(VT->getNumElements() == 2 &&
-           "TypedBufferStore double vector has wrong size");
-    ExtractNum = 4;
+  Type *ScalarTy = BufferTy->getScalarType();
+  bool IsDouble = ScalarTy->isDoubleTy();
+  assert((IsDouble || ScalarTy->isIntegerTy(64)) &&
+         "Only expand double or int64 scalars or vectors");
+
+  // Determine if we're dealing with a vector or scalar
+  bool IsVector = isa<FixedVectorType>(BufferTy);
+  if (IsVector) {
+    assert(cast<FixedVectorType>(BufferTy)->getNumElements() == 2 &&
+           "TypedBufferStore vector must be size 2");
   }
 
-  Type *SplitElementTy = Builder.getInt32Ty();
-  if (ExtractNum == 4)
-    SplitElementTy = VectorType::get(SplitElementTy, 2, false);
-
-  // split our double(s)
-  auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);
-  Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
-                                         Orig->getOperand(2));
-  // create our vector
-  Value *LowBits = Builder.CreateExtractValue(Split, 0);
-  Value *HighBits = Builder.CreateExtractValue(Split, 1);
-  Value *Val;
-  if (ExtractNum == 2) {
-    Val = PoisonValue::get(VectorType::get(SplitElementTy, 2, false));
-    Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
-    Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
-  } else
-    Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});
+  // Create the appropriate vector type for the result
+  Type *Int32Ty = Builder.getInt32Ty();
+  Type *ResultTy = VectorType::get(Int32Ty, IsVector ? 4 : 2, false);
+  Value *Val = PoisonValue::get(ResultTy);
+
+  // Split the 64-bit values into 32-bit components
+  if (IsDouble) {
+    // Handle double type(s)
+    Type *SplitElementTy = Int32Ty;
+    if (IsVector)
+      SplitElementTy = VectorType::get(SplitElementTy, 2, false);
+
+    auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);
+    Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
+                                           {Orig->getOperand(2)});
+    Value *LowBits = Builder.CreateExtractValue(Split, 0);
+    Value *HighBits = Builder.CreateExtractValue(Split, 1);
+
+    if (IsVector) {
+      // For vector doubles, use shuffle to create the final vector
+      Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});
+    } else {
+      // For scalar doubles, insert the elements
+      Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
+      Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
+    }
+  } else {
+    // Handle int64 type(s)
+    Value *InputVal = Orig->getOperand(2);
+
+    if (IsVector) {
+      // Handle vector of int64
+      for (unsigned I = 0; I < 2; ++I) {
+        // Extract each int64 element
+        Value *Int64Val =
+            Builder.CreateExtractElement(InputVal, Builder.getInt32(I));
+
+        // Split into low and high 32-bit parts
+        Value *LowBits = Builder.CreateTrunc(Int64Val, Int32Ty);
+        Value *ShiftedVal = Builder.CreateLShr(Int64Val, Builder.getInt64(32));
+        Value *HighBits = Builder.CreateTrunc(ShiftedVal, Int32Ty);
+
+        // Insert into result vector
+        Val =
+            Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(I * 2));
+        Val = Builder.CreateInsertElement(Val, HighBits,
+                                          Builder.getInt32(I * 2 + 1));
+      }
+    } else {
+      // Handle scalar int64
+      Value *LowBits = Builder.CreateTrunc(InputVal, Int32Ty);
+      Value *ShiftedVal = Builder.CreateLShr(InputVal, Builder.getInt64(32));
+      Value *HighBits = Builder.CreateTrunc(ShiftedVal, Int32Ty);
----------------
bogner wrote:

I think you can combine the scalar and vector logic if you avoid scalarizing this by hand. Ie, you could do something like:
```c++
    Constant *ShiftAmt = Builder.getInt64(32);
    if (IsVector)
      ShiftAmt = ConstantVector::getSplat(ElementCount::getFixed(2), ShiftAmt);

    // Split into low and high 32-bit parts
    Value *LowBits = Builder.CreateTrunc(InputVal, SplitElementTy);
    Value *ShiftedVal = Builder.CreateLShr(InputVal, ShiftAmt);
    Value *HighBits = Builder.CreateTrunc(ShiftedVal, SplitElementTy);
```

Then I think this fits nicely with the logic for doubles above where we're just changing the conversion operation itself and the surrounding IR is unchanged.

https://github.com/llvm/llvm-project/pull/145047