[llvm] [AArch64] Add @llvm.experimental.vector.match (PR #101974)

Fri Oct 25 02:43:03 PDT 2024

================
@@ -6379,42 +6379,86 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     assert((Op1VT.getVectorElementType() == MVT::i8 ||
             Op1VT.getVectorElementType() == MVT::i16) &&
            "Expected 8-bit or 16-bit characters.");
-    assert(!Op2VT.isScalableVector() && "Search vector cannot be scalable.");
     assert(Op1VT.getVectorElementType() == Op2VT.getVectorElementType() &&
            "Operand type mismatch.");
-    assert(Op1VT.getVectorMinNumElements() == Op2VT.getVectorNumElements() &&
-           "Invalid operands.");
-
-    // Wrap the search vector in a scalable vector.
-    EVT OpContainerVT = getContainerForFixedLengthVector(DAG, Op2VT);
-    Op2 = convertToScalableVector(DAG, OpContainerVT, Op2);
-
-    // If the result is scalable, we need to broadbast the search vector across
-    // the SVE register and then carry out the MATCH.
-    if (ResVT.isScalableVector()) {
-      Op2 = DAG.getNode(AArch64ISD::DUPLANE128, dl, OpContainerVT, Op2,
-                        DAG.getTargetConstant(0, dl, MVT::i64));
+    assert(!Op2VT.isScalableVector() && "Search vector cannot be scalable.");
+
+    // Note: Currently Op1 needs to be v16i8, v8i16, or the scalable versions.
+    // In the future we could support other types (e.g. v8i8).
+    assert(Op1VT.getSizeInBits().getKnownMinValue() == 128 &&
+           "Unsupported first operand type.");
+
+    // Scalable vector type used to wrap operands.
+    // A single container is enough for both operands because ultimately the
+    // operands will have to be wrapped to the same type (nxv16i8 or nxv8i16).
+    EVT OpContainerVT = Op1VT.isScalableVector()
+                            ? Op1VT
+                            : getContainerForFixedLengthVector(DAG, Op1VT);
+
+    // Wrap Op2 in a scalable register, and splat it if necessary.
+    if (Op1VT.getVectorMinNumElements() == Op2VT.getVectorNumElements()) {
+      // If Op1 and Op2 have the same number of elements we can trivially
+      // wrapping Op2 in an SVE register.
+      Op2 = convertToScalableVector(DAG, OpContainerVT, Op2);
+      // If the result is scalable, we need to broadcast Op2 to a full SVE
+      // register.
+      if (ResVT.isScalableVector())
+        Op2 = DAG.getNode(AArch64ISD::DUPLANE128, dl, OpContainerVT, Op2,
+                          DAG.getTargetConstant(0, dl, MVT::i64));
+    } else {
+      // If Op1 and Op2 have different number of elements, we need to broadcast
+      // Op2. Ideally we would use a AArch64ISD::DUPLANE* node for this
+      // similarly to the above, but unfortunately it seems we are missing some
+      // patterns for this. So, in alternative, we splat Op2 through a splat of
+      // a scalable vector extract. This idiom, though a bit more verbose, is
+      // supported and get us the MOV instruction we want.
+
+      // Some types we need. We'll use an integer type with `Op2BitWidth' bits
+      // to wrap Op2 and simulate the DUPLANE.
+      unsigned Op2BitWidth = Op2VT.getFixedSizeInBits();
+      MVT Op2IntVT = MVT::getIntegerVT(Op2BitWidth);
+      MVT Op2FixedVT = MVT::getVectorVT(Op2IntVT, 128 / Op2BitWidth);
+      EVT Op2ScalableVT = getContainerForFixedLengthVector(DAG, Op2FixedVT);
----------------
rj-jesus wrote:

Thank you very much, done!

https://github.com/llvm/llvm-project/pull/101974