[llvm-branch-commits] [llvm] [AArch64] Split large loop dependence masks (PR #153187)

Tue Aug 26 06:59:31 PDT 2025

================
@@ -5248,49 +5248,94 @@ AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(SDValue Op,
                                                  SelectionDAG &DAG) const {
   SDLoc DL(Op);
   uint64_t EltSize = Op.getConstantOperandVal(2);
-  EVT VT = Op.getValueType();
+  EVT FullVT = Op.getValueType();
+  unsigned NumElements = FullVT.getVectorMinNumElements();
+  unsigned NumSplits = 0;
+  EVT EltVT;
   switch (EltSize) {
   case 1:
-    if (VT != MVT::v16i8 && VT != MVT::nxv16i1)
-      return SDValue();
+    EltVT = MVT::i8;
     break;
   case 2:
-    if (VT != MVT::v8i8 && VT != MVT::nxv8i1)
-      return SDValue();
+    if (NumElements >= 16)
+      NumSplits = NumElements / 16;
+    EltVT = MVT::i16;
     break;
   case 4:
-    if (VT != MVT::v4i16 && VT != MVT::nxv4i1)
-      return SDValue();
+    if (NumElements >= 8)
+      NumSplits = NumElements / 8;
+    EltVT = MVT::i32;
     break;
   case 8:
-    if (VT != MVT::v2i32 && VT != MVT::nxv2i1)
-      return SDValue();
+    if (NumElements >= 4)
+      NumSplits = NumElements / 4;
+    EltVT = MVT::i64;
     break;
   default:
     // Other element sizes are incompatible with whilewr/rw, so expand instead
     return SDValue();
   }
 
-  SDValue PtrA = Op.getOperand(0);
-  SDValue PtrB = Op.getOperand(1);
+  auto LowerToWhile = [&](EVT VT, unsigned AddrScale) {
+    SDValue PtrA = Op.getOperand(0);
+    SDValue PtrB = Op.getOperand(1);
 
-  if (VT.isScalableVT())
-    return DAG.getNode(Op.getOpcode(), DL, VT, PtrA, PtrB, Op.getOperand(2));
+    EVT StoreVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+                                   VT.getVectorMinNumElements(), false);
----------------
SamTebbs33 wrote:

I see, I wanted to be safe and use `getStoreSizeInBits()` in case that was ever different from `EltSize * AddrScale * NumElements`.  Done.

https://github.com/llvm/llvm-project/pull/153187