[llvm-branch-commits] [llvm] [LoongArch] Add support for vector FP_ROUND from vxf64 to vxf32 (PR #164059)

Thu Apr 9 05:48:52 PDT 2026

================
@@ -592,7 +596,101 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
     return lowerVECREDUCE(Op, DAG);
   case ISD::ConstantFP:
     return lowerConstantFP(Op, DAG);
+  case ISD::FP_ROUND:
+    return lowerFP_ROUND(Op, DAG);
+  }
+  return SDValue();
+}
+
+// Combine two ISD::FP_ROUND / LoongArchISD::VFCVT nodes with same type to
+// LoongArchISD::VFCVT. For example:
+//   x1 = fp_round x, 0
+//   y1 = fp_round y, 0
+//   z = concat_vectors x1, y1
+// Or
+//   x1 = LoongArch::VFCVT undef, x
+//   y1 = LoongArch::VFCVT undef, y
+//   z = LoongArchISD::VPACKEV y1, x1
+// can be combined to:
+//   z = LoongArch::VFCVT y, x
+static SDValue combineFP_ROUND(SDValue N, const SDLoc &DL, SelectionDAG &DAG,
+                               const LoongArchSubtarget &Subtarget) {
+  assert(((N->getOpcode() == ISD::CONCAT_VECTORS && N->getNumOperands() == 2) ||
+          (N->getOpcode() == LoongArchISD::VPACKEV)) &&
+         "Invalid Node");
+
+  SDValue Op0 = peekThroughBitcasts(N->getOperand(0));
+  SDValue Op1 = peekThroughBitcasts(N->getOperand(1));
+  unsigned Opcode0 = Op0.getOpcode();
+  unsigned Opcode1 = Op1.getOpcode();
+  if (Opcode0 != Opcode1)
+    return SDValue();
+
+  if (Opcode0 != ISD::FP_ROUND && Opcode0 != LoongArchISD::VFCVT)
+    return SDValue();
+
+  // Check if two nodes have only one use.
+  if (!Op0.hasOneUse() || !Op1.hasOneUse())
+    return SDValue();
+
+  EVT VT = N.getValueType();
+  EVT SVT0 = Op0.getValueType();
+  EVT SVT1 = Op1.getValueType();
+  // Check if two nodes have the same result type.
+  if (SVT0 != SVT1)
+    return SDValue();
+
+  // Check if two nodes have the same operand type.
+  EVT SSVT0 = Op0.getOperand(0).getValueType();
+  EVT SSVT1 = Op1.getOperand(0).getValueType();
+  if (SSVT0 != SSVT1)
+    return SDValue();
+
+  if (N->getOpcode() == ISD::CONCAT_VECTORS && Opcode0 == ISD::FP_ROUND) {
+    if (Subtarget.hasExtLASX() && VT.is256BitVector() && SVT0 == MVT::v4f32 &&
+        SSVT0 == MVT::v4f64) {
+      // A vector_shuffle is required in the final step, as xvfcvt instruction
+      // operates on each 128-bit segament as a lane.
+      SDValue Res = DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v8f32,
+                                Op1.getOperand(0), Op0.getOperand(0));
+      SDValue Undef = DAG.getUNDEF(VT);
+      SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
+      Res = DAG.getVectorShuffle(VT, DL, Res, Undef, Mask);
----------------
wangleiat wrote:

Be careful when using `peekThroughBitcasts` before `getVectorShuffle`. If the bitcast changes lane width or element count, the original shuffle mask may become invalid and could trigger an assert. Ensure lane width and count are unchanged.
e.g.
```
define <4 x i64> @fptrunc_concat_bitcast(<8 x double> %x) {
entry:
  ; split to 2 x <4 x double>
  %lo = shufflevector <8 x double> %x, <8 x double> poison,
                    <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %hi = shufflevector <8 x double> %x, <8 x double> poison,
                    <4 x i32> <i32 4, i32 5, i32 6, i32 7>

  %r0 = fptrunc <4 x double> %lo to <4 x float>
  %r1 = fptrunc <4 x double> %hi to <4 x float>

  %i0 = bitcast <4 x float> %r0 to <2 x i64>
  %i1 = bitcast <4 x float> %r1 to <2 x i64>

  ; concat as integer vector
  %cat = shufflevector <2 x i64> %i0, <2 x i64> %i1,
                     <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  ret <4 x i64> %cat
}
```

https://github.com/llvm/llvm-project/pull/164059