[llvm] [AMDGPU] Legalize 64bit elements for BUILD_VECTOR on gfx942 (PR #145052)

Wed Jul 30 03:50:36 PDT 2025

================
@@ -15896,6 +15936,92 @@ SDValue SITargetLowering::performClampCombine(SDNode *N,
   return SDValue(CSrc, 0);
 }
 
+SDValue
+SITargetLowering::performBuildVectorCombine(SDNode *N,
+                                            DAGCombinerInfo &DCI) const {
+  const GCNSubtarget *ST = getSubtarget();
+  if (DCI.Level < AfterLegalizeDAG || !ST->hasMovB64())
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc SL(N);
+
+  EVT VT = N->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
+  unsigned SizeBits = VT.getSizeInBits();
+  unsigned EltSize = EltVT.getSizeInBits();
+
+  // Skip if:
+  //  - Value type isn't multiple of 64 bit (e.g., v3i32), or
+  //  - Element type has already been combined into 64b elements
+  if ((SizeBits % 64) != 0 || EltVT == MVT::i64 || EltVT == MVT::f64)
+    return SDValue();
+
+  // Construct the 64b values.
+  SmallVector<uint64_t, 8> ImmVals;
+  uint64_t ImmVal = 0;
+  uint64_t ImmSize = 0;
+  for (SDValue Opand : N->ops()) {
+    // Build_vector with constants only.
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Opand);
+    ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Opand);
+    BuildVectorSDNode *BV =
+        dyn_cast<BuildVectorSDNode>(peekThroughBitcasts(Opand));
+
+    if (!C && !FPC && !BV)
+      return SDValue();
+
+    uint64_t Val = 0;
+    if (BV) {
+      if (!BV->isConstant())
+        return SDValue();
+      bool IsLE = DAG.getDataLayout().isLittleEndian();
+      BitVector UndefElements;
+      SmallVector<APInt> RawBits;
+      if (!BV->getConstantRawBits(IsLE, EltSize, RawBits, UndefElements))
+        return SDValue();
+
+      assert(RawBits.size() == 1 &&
+             "BuildVector constant value retrieval expected 1 element");
+
+      if (UndefElements.any())
+        return SDValue();
+
+      Val = RawBits[0].getZExtValue();
+    } else
+      Val = C ? C->getZExtValue()
+              : FPC->getValueAPF().bitcastToAPInt().getZExtValue();
+    ImmVal |= Val << ImmSize;
+    ImmSize += EltSize;
+    if (ImmSize == 64) {
+      if (!isUInt<32>(ImmVal))
+        return SDValue();
+      ImmVals.push_back(ImmVal);
+      ImmVal = 0;
+      ImmSize = 0;
+    }
+  }
+
+  // Avoid emitting build_vector with 1 element and directly emit value.
+  if (ImmVals.size() == 1) {
+    SDValue Val = DAG.getConstant(ImmVals[0], SL, MVT::i64);
+    return DAG.getBitcast(VT, Val);
+  }
+
+  // Construct and return build_vector with 64b elements.
+  if (!ImmVals.empty()) {
+    SmallVector<SDValue, 8> VectorConsts;
+    for (uint64_t I : ImmVals)
+      VectorConsts.push_back(DAG.getConstant(I, SL, MVT::i64));
----------------
arsenm wrote:

Size the vector to start and avoid push_back in loop 

https://github.com/llvm/llvm-project/pull/145052