[llvm] [AArch64][SVE] Use SVE for scalar FP converts in streaming[-compatible] functions (PR #112564)

Thu Oct 24 06:44:43 PDT 2024

================
@@ -18929,13 +18929,67 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
   return SDValue();
 }
 
+static bool
+shouldUseSVEForScalarFPConversion(SDNode *N,
+                                  const AArch64Subtarget *Subtarget) {
+  auto isSupportedType = [](EVT VT) {
+    if (!VT.isSimple())
+      return false;
+    // There are SVE instructions that can convert to/from all pairs of these
+    // int and float types. Note: We don't bother with i8 or i16 as those are
+    // illegal types for scalars.
+    return is_contained({MVT::i32, MVT::i64, MVT::f16, MVT::f32, MVT::f64},
+                        VT.getSimpleVT().SimpleTy);
+  };
+  // If we are in a streaming[-compatible] function, use SVE for scalar FP <->
+  // INT conversions as this can help avoid movs between GPRs and FPRs, which
+  // could be quite expensive.
+  return !N->isStrictFPOpcode() && Subtarget->isSVEorStreamingSVEAvailable() &&
+         (Subtarget->isStreaming() || Subtarget->isStreamingCompatible()) &&
+         isSupportedType(N->getValueType(0)) &&
+         isSupportedType(N->getOperand(0).getValueType());
+}
+
+/// Replaces a scalar FP <-> INT conversion with an SVE (scalable) one, wrapped
+/// with an insert and extract.
+static SDValue replaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG) {
+  assert(!N->isStrictFPOpcode() && "strict fp ops not supported");
+  SDValue SrcVal = N->getOperand(0);
+  EVT SrcTy = SrcVal.getValueType();
+  EVT DestTy = N->getValueType(0);
+  EVT SrcVecTy;
+  EVT DestVecTy;
+  // Use a packed vector for the larger type.
+  // Note: For conversions such as FCVTZS_ZPmZ_DtoS, and UCVTF_ZPmZ_StoD that
+  // notionally take or return a nxv2i32 type we must instead use a nxv4i32, as
+  // (unlike floats) nxv2i32 is an illegal unpacked type.
+  if (DestTy.bitsGT(SrcTy)) {
+    DestVecTy = getPackedSVEVectorVT(DestTy);
+    SrcVecTy = SrcTy == MVT::i32 ? getPackedSVEVectorVT(SrcTy)
+                                 : DestVecTy.changeVectorElementType(SrcTy);
+  } else {
+    SrcVecTy = getPackedSVEVectorVT(SrcTy);
+    DestVecTy = DestTy == MVT::i32 ? getPackedSVEVectorVT(DestTy)
+                                   : SrcVecTy.changeVectorElementType(DestTy);
+  }
----------------
paulwalker-arm wrote:

That is my point, for the cases where the conversion does not fit within an `i32` the result is poison.  This means we don't need to worry about that corner case and so by instead converting to an i64 (for the cases where the i32 base vector type would not be type legal) and truncating the result we can match all the expected results for the defined cases.

https://github.com/llvm/llvm-project/pull/112564