[llvm] [NVPTX] Vectorize and lower 256-bit global loads/stores for sm_100+/ptx88+ (PR #139292)

Fri May 9 16:28:15 PDT 2025

================
@@ -1483,44 +1495,27 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
   const unsigned TotalWidth = StoreVT.getSimpleVT().getSizeInBits();
   unsigned ToType = getLdStRegType(StoreVT.getSimpleVT().getScalarType());
 
-  SmallVector<SDValue, 12> Ops;
-  SDValue N2;
-  unsigned VecType;
-  unsigned ToTypeWidth;
-
-  switch (N->getOpcode()) {
-  case NVPTXISD::StoreV2:
-    VecType = NVPTX::PTXLdStInstCode::V2;
-    Ops.append({N->getOperand(1), N->getOperand(2)});
-    N2 = N->getOperand(3);
-    ToTypeWidth = TotalWidth / 2;
-    break;
-  case NVPTXISD::StoreV4:
-    VecType = NVPTX::PTXLdStInstCode::V4;
-    Ops.append({N->getOperand(1), N->getOperand(2), N->getOperand(3),
-                N->getOperand(4)});
-    N2 = N->getOperand(5);
-    ToTypeWidth = TotalWidth / 4;
-    break;
-  default:
-    return false;
-  }
+  unsigned NumElts = getLoadStoreVectorNumElts(N);
+  SmallVector<SDValue, 16> Ops;
+  for (unsigned I : llvm::seq(NumElts))
+    Ops.append({N->getOperand(I + 1)});
----------------
AlexMaclean wrote:

Looks like this should be possible to do with a single call to `Ops.append` by constructing an array ref or something similar into N's operands.

https://github.com/llvm/llvm-project/pull/139292