[llvm] [Intrinsics][AArch64] Add intrinsic to mask off aliasing vector lanes (PR #117007)

Wed Jan 29 09:13:44 PST 2025

================
@@ -5263,6 +5273,65 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
 
 static MVT getSVEContainerType(EVT ContentTy);
 
+SDValue AArch64TargetLowering::LowerALIAS_LANE_MASK(SDValue Op,
+                                                    SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  unsigned IntrinsicID = 0;
+  uint64_t EltSize = Op.getConstantOperandVal(2);
+  bool IsWriteAfterRead = Op.getConstantOperandVal(3) == 1;
+  EVT VT = Op.getValueType();
+  MVT SimpleVT = VT.getSimpleVT();
+  // Make sure that the promoted mask size and element size match
+  switch (EltSize) {
+  case 1:
+    IntrinsicID = IsWriteAfterRead ? Intrinsic::aarch64_sve_whilewr_b
+                                   : Intrinsic::aarch64_sve_whilerw_b;
+    assert((SimpleVT == MVT::v16i8 || SimpleVT == MVT::nxv16i1) &&
+           "Unexpected mask or element size");
+    break;
+  case 2:
+    IntrinsicID = IsWriteAfterRead ? Intrinsic::aarch64_sve_whilewr_h
+                                   : Intrinsic::aarch64_sve_whilerw_h;
+    assert((SimpleVT == MVT::v8i8 || SimpleVT == MVT::nxv8i1) &&
+           "Unexpected mask or element size");
+    break;
+  case 4:
+    IntrinsicID = IsWriteAfterRead ? Intrinsic::aarch64_sve_whilewr_s
+                                   : Intrinsic::aarch64_sve_whilerw_s;
+    assert((SimpleVT == MVT::v4i16 || SimpleVT == MVT::nxv4i1) &&
+           "Unexpected mask or element size");
+    break;
+  case 8:
+    IntrinsicID = IsWriteAfterRead ? Intrinsic::aarch64_sve_whilewr_d
+                                   : Intrinsic::aarch64_sve_whilerw_d;
+    assert((SimpleVT == MVT::v2i32 || SimpleVT == MVT::nxv2i1) &&
+           "Unexpected mask or element size");
+    break;
+  default:
+    llvm_unreachable("Unexpected element size for get.alias.lane.mask");
+    break;
+  }
+  SDValue ID = DAG.getTargetConstant(IntrinsicID, DL, MVT::i64);
+
+  if (VT.isScalableVector())
+    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, ID, Op.getOperand(0),
+                       Op.getOperand(1));
+
+  // We can use the SVE whilewr/whilerw instruction to lower this
+  // intrinsic by creating the appropriate sequence of scalable vector
+  // operations and then extracting a fixed-width subvector from the scalable
+  // vector.
+
+  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
+  EVT WhileVT = ContainerVT.changeElementType(MVT::i1);
+
+  SDValue Mask = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WhileVT, ID,
----------------
sdesmalen-arm wrote:

What's the reason for the indirection through an intrinsic call, rather than creating a AArch64ISD::WHILERW/WHILEWR node directly?

https://github.com/llvm/llvm-project/pull/117007