[llvm-branch-commits] [llvm] [AArch64] Split large loop dependence masks (PR #153187)
Benjamin Maxwell via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Aug 20 08:22:23 PDT 2025
================
@@ -5248,49 +5248,94 @@ AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
uint64_t EltSize = Op.getConstantOperandVal(2);
- EVT VT = Op.getValueType();
+ EVT FullVT = Op.getValueType();
+ unsigned NumElements = FullVT.getVectorMinNumElements();
+ unsigned NumSplits = 0;
+ EVT EltVT;
switch (EltSize) {
case 1:
- if (VT != MVT::v16i8 && VT != MVT::nxv16i1)
- return SDValue();
+ EltVT = MVT::i8;
break;
case 2:
- if (VT != MVT::v8i8 && VT != MVT::nxv8i1)
- return SDValue();
+ if (NumElements >= 16)
+ NumSplits = NumElements / 16;
+ EltVT = MVT::i16;
break;
case 4:
- if (VT != MVT::v4i16 && VT != MVT::nxv4i1)
- return SDValue();
+ if (NumElements >= 8)
+ NumSplits = NumElements / 8;
+ EltVT = MVT::i32;
break;
case 8:
- if (VT != MVT::v2i32 && VT != MVT::nxv2i1)
- return SDValue();
+ if (NumElements >= 4)
+ NumSplits = NumElements / 4;
+ EltVT = MVT::i64;
----------------
MacDue wrote:
I found the underlying pattern here somewhat obfuscated, I think this can be simplified quite a bit to:
```cpp
uint64_t EltSizeInBytes = Op.getConstantOperandVal(2);
// Other element sizes are incompatible with whilewr/rw, so expand instead
if (!is_contained({1u, 2u, 4u, 8u}, EltSizeInBytes))
return SDValue();
EVT FullVT = Op.getValueType();
EVT EltVT = MVT::getIntegerVT(EltSizeInBytes * 8);
unsigned NumElements = FullVT.getVectorMinNumElements();
unsigned PredElements = getPackedSVEVectorVT(EltVT).getVectorMinNumElements();
unsigned NumWhiles = NumElements / PredElements;
```
https://github.com/llvm/llvm-project/pull/153187
More information about the llvm-branch-commits
mailing list