[llvm-branch-commits] [llvm] [AArch64] Split large loop dependence masks (PR #153187)

Benjamin Maxwell via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Aug 20 08:22:23 PDT 2025


================
@@ -5248,49 +5248,94 @@ AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(SDValue Op,
                                                  SelectionDAG &DAG) const {
   SDLoc DL(Op);
   uint64_t EltSize = Op.getConstantOperandVal(2);
-  EVT VT = Op.getValueType();
+  EVT FullVT = Op.getValueType();
+  unsigned NumElements = FullVT.getVectorMinNumElements();
+  unsigned NumSplits = 0;
+  EVT EltVT;
   switch (EltSize) {
   case 1:
-    if (VT != MVT::v16i8 && VT != MVT::nxv16i1)
-      return SDValue();
+    EltVT = MVT::i8;
     break;
   case 2:
-    if (VT != MVT::v8i8 && VT != MVT::nxv8i1)
-      return SDValue();
+    if (NumElements >= 16)
+      NumSplits = NumElements / 16;
+    EltVT = MVT::i16;
     break;
   case 4:
-    if (VT != MVT::v4i16 && VT != MVT::nxv4i1)
-      return SDValue();
+    if (NumElements >= 8)
+      NumSplits = NumElements / 8;
+    EltVT = MVT::i32;
     break;
   case 8:
-    if (VT != MVT::v2i32 && VT != MVT::nxv2i1)
-      return SDValue();
+    if (NumElements >= 4)
+      NumSplits = NumElements / 4;
+    EltVT = MVT::i64;
----------------
MacDue wrote:

I found the underlying pattern here somewhat obfuscated, I think this can be simplified quite a bit to: 
```cpp
  uint64_t EltSizeInBytes = Op.getConstantOperandVal(2);

  // Other element sizes are incompatible with whilewr/rw, so expand instead
  if (!is_contained({1u, 2u, 4u, 8u}, EltSizeInBytes))
    return SDValue();

  EVT FullVT = Op.getValueType();
  EVT EltVT = MVT::getIntegerVT(EltSizeInBytes * 8);

  unsigned NumElements = FullVT.getVectorMinNumElements();
  unsigned PredElements = getPackedSVEVectorVT(EltVT).getVectorMinNumElements();
  unsigned NumWhiles = NumElements / PredElements;
```


https://github.com/llvm/llvm-project/pull/153187


More information about the llvm-branch-commits mailing list