[llvm] [Intrinsics][AArch64] Add intrinsic to mask off aliasing vector lanes (PR #117007)

Mon Aug 11 02:57:04 PDT 2025

================
@@ -5201,6 +5215,99 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
 
 static MVT getSVEContainerType(EVT ContentTy);
 
+SDValue
+AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  uint64_t EltSize = Op.getConstantOperandVal(2);
+  EVT FullVT = Op.getValueType();
+  unsigned NumElements = FullVT.getVectorMinNumElements();
+  unsigned NumSplits = 0;
+  EVT EltVT;
+  switch (EltSize) {
+  case 1:
+    EltVT = MVT::i8;
+    break;
+  case 2:
+    if (NumElements >= 16)
+      NumSplits = NumElements / 16;
+    EltVT = MVT::i16;
+    break;
+  case 4:
+    if (NumElements >= 8)
+      NumSplits = NumElements / 8;
+    EltVT = MVT::i32;
+    break;
+  case 8:
+    if (NumElements >= 4)
+      NumSplits = NumElements / 4;
+    EltVT = MVT::i64;
+    break;
+  default:
+    // Other element sizes are incompatible with whilewr/rw, so expand instead
+    return SDValue();
+  }
+
+  auto LowerToWhile = [&](EVT VT, unsigned AddrScale) {
+    SDValue PtrA = Op.getOperand(0);
+    SDValue PtrB = Op.getOperand(1);
+
+    EVT StoreVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+                                   VT.getVectorMinNumElements(), false);
+    unsigned Offset = StoreVT.getStoreSizeInBits() / 8 * AddrScale;
+    SDValue Addend;
+
+    if (VT.isScalableVT())
+      Addend = DAG.getVScale(DL, MVT::i64, APInt(64, Offset));
+    else
+      Addend = DAG.getConstant(Offset, DL, MVT::i64);
+
+    PtrA = DAG.getNode(ISD::ADD, DL, MVT::i64, PtrA, Addend);
+    PtrB = DAG.getNode(ISD::ADD, DL, MVT::i64, PtrB, Addend);
+
+    if (VT.isScalableVT())
+      return DAG.getNode(Op.getOpcode(), DL, VT, PtrA, PtrB, Op.getOperand(2));
+
+    // We can use the SVE whilewr/whilerw instruction to lower this
+    // intrinsic by creating the appropriate sequence of scalable vector
+    // operations and then extracting a fixed-width subvector from the scalable
+    // vector. Scalable vector variants are already legal.
+    EVT ContainerVT =
+        EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+                         VT.getVectorNumElements(), true);
+    EVT WhileVT = ContainerVT.changeElementType(MVT::i1);
+
+    SDValue Mask =
+        DAG.getNode(Op.getOpcode(), DL, WhileVT, PtrA, PtrB, Op.getOperand(2));
+    SDValue MaskAsInt = DAG.getNode(ISD::SIGN_EXTEND, DL, ContainerVT, Mask);
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, MaskAsInt,
+                       DAG.getVectorIdxConstant(0, DL));
+  };
+
+  if (NumSplits == 0)
+    return LowerToWhile(FullVT, false);
----------------
SamTebbs33 wrote:

Just spotted that this needs to be 0, rather than false.

https://github.com/llvm/llvm-project/pull/117007