[llvm] [Intrinsics][AArch64] Add intrinsic to mask off aliasing vector lanes (PR #117007)
Sam Tebbs via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 11 02:57:04 PDT 2025
================
@@ -5201,6 +5215,99 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
static MVT getSVEContainerType(EVT ContentTy);
+SDValue
+AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ uint64_t EltSize = Op.getConstantOperandVal(2);
+ EVT FullVT = Op.getValueType();
+ unsigned NumElements = FullVT.getVectorMinNumElements();
+ unsigned NumSplits = 0;
+ EVT EltVT;
+ switch (EltSize) {
+ case 1:
+ EltVT = MVT::i8;
+ break;
+ case 2:
+ if (NumElements >= 16)
+ NumSplits = NumElements / 16;
+ EltVT = MVT::i16;
+ break;
+ case 4:
+ if (NumElements >= 8)
+ NumSplits = NumElements / 8;
+ EltVT = MVT::i32;
+ break;
+ case 8:
+ if (NumElements >= 4)
+ NumSplits = NumElements / 4;
+ EltVT = MVT::i64;
+ break;
+ default:
+ // Other element sizes are incompatible with whilewr/rw, so expand instead
+ return SDValue();
+ }
+
+ auto LowerToWhile = [&](EVT VT, unsigned AddrScale) {
+ SDValue PtrA = Op.getOperand(0);
+ SDValue PtrB = Op.getOperand(1);
+
+ EVT StoreVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ VT.getVectorMinNumElements(), false);
+ unsigned Offset = StoreVT.getStoreSizeInBits() / 8 * AddrScale;
+ SDValue Addend;
+
+ if (VT.isScalableVT())
+ Addend = DAG.getVScale(DL, MVT::i64, APInt(64, Offset));
+ else
+ Addend = DAG.getConstant(Offset, DL, MVT::i64);
+
+ PtrA = DAG.getNode(ISD::ADD, DL, MVT::i64, PtrA, Addend);
+ PtrB = DAG.getNode(ISD::ADD, DL, MVT::i64, PtrB, Addend);
+
+ if (VT.isScalableVT())
+ return DAG.getNode(Op.getOpcode(), DL, VT, PtrA, PtrB, Op.getOperand(2));
+
+ // We can use the SVE whilewr/whilerw instruction to lower this
+ // intrinsic by creating the appropriate sequence of scalable vector
+ // operations and then extracting a fixed-width subvector from the scalable
+ // vector. Scalable vector variants are already legal.
+ EVT ContainerVT =
+ EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ VT.getVectorNumElements(), true);
+ EVT WhileVT = ContainerVT.changeElementType(MVT::i1);
+
+ SDValue Mask =
+ DAG.getNode(Op.getOpcode(), DL, WhileVT, PtrA, PtrB, Op.getOperand(2));
+ SDValue MaskAsInt = DAG.getNode(ISD::SIGN_EXTEND, DL, ContainerVT, Mask);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, MaskAsInt,
+ DAG.getVectorIdxConstant(0, DL));
+ };
+
+ if (NumSplits == 0)
+ return LowerToWhile(FullVT, false);
----------------
SamTebbs33 wrote:
Just spotted that this needs to be 0, rather than false.
https://github.com/llvm/llvm-project/pull/117007
More information about the llvm-commits
mailing list