[llvm-branch-commits] [llvm] [AArch64] Split large loop dependence masks (PR #153187)
Sander de Smalen via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Aug 21 08:10:37 PDT 2025
================
@@ -5248,49 +5248,94 @@ AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
uint64_t EltSize = Op.getConstantOperandVal(2);
- EVT VT = Op.getValueType();
+ EVT FullVT = Op.getValueType();
+ unsigned NumElements = FullVT.getVectorMinNumElements();
+ unsigned NumSplits = 0;
+ EVT EltVT;
switch (EltSize) {
case 1:
- if (VT != MVT::v16i8 && VT != MVT::nxv16i1)
- return SDValue();
+ EltVT = MVT::i8;
break;
case 2:
- if (VT != MVT::v8i8 && VT != MVT::nxv8i1)
- return SDValue();
+ if (NumElements >= 16)
+ NumSplits = NumElements / 16;
+ EltVT = MVT::i16;
break;
case 4:
- if (VT != MVT::v4i16 && VT != MVT::nxv4i1)
- return SDValue();
+ if (NumElements >= 8)
+ NumSplits = NumElements / 8;
+ EltVT = MVT::i32;
break;
case 8:
- if (VT != MVT::v2i32 && VT != MVT::nxv2i1)
- return SDValue();
+ if (NumElements >= 4)
+ NumSplits = NumElements / 4;
+ EltVT = MVT::i64;
break;
default:
// Other element sizes are incompatible with whilewr/rw, so expand instead
return SDValue();
}
- SDValue PtrA = Op.getOperand(0);
- SDValue PtrB = Op.getOperand(1);
+ auto LowerToWhile = [&](EVT VT, unsigned AddrScale) {
+ SDValue PtrA = Op.getOperand(0);
+ SDValue PtrB = Op.getOperand(1);
- if (VT.isScalableVT())
- return DAG.getNode(Op.getOpcode(), DL, VT, PtrA, PtrB, Op.getOperand(2));
+ EVT StoreVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ VT.getVectorMinNumElements(), false);
----------------
sdesmalen-arm wrote:
It is not necessary to create StoreVT, you can do `unsigned Offset = VT.getVectorMinNumElements() * EltSize * AddrScale`.
https://github.com/llvm/llvm-project/pull/153187
More information about the llvm-branch-commits
mailing list