[llvm] [SDAG] Widen TRUNCATE to intermediate type to avoid ISel failure (PR #172473)

Tue Dec 16 04:51:29 PST 2025

https://github.com/gbossu created https://github.com/llvm/llvm-project/pull/172473

SelectionDAG offered no way to widen TRUNCATE for pathological types like <vscale x 1 x ...> as they do not allow scalarisation.

One way to go further to is widen to an intermediate type which will allow to promote the element type in a later run of legalisation.

>From cd09bf8a272b718d270b1b0e27d3a2f8d092cb51 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= <gaetan.bossu at arm.com>
Date: Mon, 15 Dec 2025 16:59:42 +0000
Subject: [PATCH] [SDAG] Widen TRUNCATE to intermediate type to avoid ISel
 failure

SelectionDAG offered no way to widen TRUNCATE for pathological types
like <vscale x 1 x ...> as they do not allow scalarisation.

One way to go further to is widen to an intermediate type which will
allow to promote the element type in a later run of legalisation.
---
 .../SelectionDAG/LegalizeVectorTypes.cpp      |  10 ++
 .../CodeGen/AArch64/saturating-vec-smull.ll   |   2 +-
 llvm/test/CodeGen/AArch64/sve-trunc.ll        | 120 ++++++++++++++++++
 3 files changed, 131 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 543072eba6d97..2c7e01af4859f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -5663,6 +5663,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
       if (Opcode == ISD::ZERO_EXTEND)
         return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
     }
+
+    // For TRUNCATE, try to widen using the legal EC of the input type instead
+    // if the legalisation action for that intermediate type is not widening.
+    EVT MidResVT =
+        EVT::getVectorVT(Ctx, WidenVT.getVectorElementType(), InVTEC);
+    if (N->getOpcode() == ISD::TRUNCATE &&
+        getTypeAction(MidResVT) == TargetLowering::TypePromoteInteger) {
+      SDValue MidRes = DAG.getNode(ISD::TRUNCATE, DL, MidResVT, InOp, Flags);
+      return DAG.getInsertSubvector(DL, DAG.getUNDEF(WidenVT), MidRes, 0);
+    }
   }
 
   if (TLI.isTypeLegal(InWidenVT)) {
diff --git a/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll b/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
index b647daf72ca35..15d72087fc7b4 100644
--- a/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
+++ b/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
@@ -211,7 +211,7 @@ define <1 x i16> @saturating_1xi16(<1 x i16> %a, <1 x i16> %b) {
 ; CHECK-NEXT:    movi v1.2s, #127, msl #8
 ; CHECK-NEXT:    sshr v0.2s, v0.2s, #15
 ; CHECK-NEXT:    smin v0.2s, v0.2s, v1.2s
-; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
+; CHECK-NEXT:    xtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
   %as = sext <1 x i16> %a to <1 x i32>
   %bs = sext <1 x i16> %b to <1 x i32>
diff --git a/llvm/test/CodeGen/AArch64/sve-trunc.ll b/llvm/test/CodeGen/AArch64/sve-trunc.ll
index be1f844c7f118..543e5bacd513b 100644
--- a/llvm/test/CodeGen/AArch64/sve-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-trunc.ll
@@ -56,6 +56,126 @@ entry:
   ret <vscale x 2 x i32> %out
 }
 
+define <vscale x 4 x i32> @trunc_i64toi32_legal_abi(<vscale x 2 x i64> %in) {
+; CHECK-LABEL: trunc_i64toi32_legal_abi:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    ret
+entry:
+  %out = trunc <vscale x 2 x i64> %in to <vscale x 2 x i32>
+  %out.legal = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.nxv2i32(<vscale x 4 x i32> poison, <vscale x 2 x i32> %out, i64 0)
+  ret <vscale x 4 x i32> %out.legal
+}
+
+define <vscale x 8 x i16> @trunc_i64toi16_legal_abi(<vscale x 2 x i64> %in) {
+; CHECK-LABEL: trunc_i64toi16_legal_abi:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    ret
+entry:
+  %out = trunc <vscale x 2 x i64> %in to <vscale x 2 x i16>
+  %out.legal = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.nxv2i16(<vscale x 8 x i16> poison, <vscale x 2 x i16> %out, i64 0)
+  ret <vscale x 8 x i16> %out.legal
+}
+
+; Truncating from an "illegal" small type to an even smaller type
+; requires promoting the element type first.
+
+define <vscale x 16 x i8> @trunc_4i16toi8(<vscale x 8 x i16> %in) {
+; CHECK-LABEL: trunc_4i16toi8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    ret
+entry:
+  %subvec = call <vscale x 4 x i16> @llvm.vector.extract.nvv4i16.nxv8i16(<vscale x 8 x i16> %in, i64 0)
+  %out = trunc <vscale x 4 x i16> %subvec to <vscale x 4 x i8>
+  %out.legal = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.nxv4i8(<vscale x 16 x i8> poison, <vscale x 4 x i8> %out, i64 0)
+  ret <vscale x 16 x i8> %out.legal
+}
+
+; <vscale x 1 x ...> types are tricky because their element type cannot be
+; promoted to form a legal vector type. Instead, they need widening.
+; Note: The uzp1 operations are due to vector.insert().
+
+define <vscale x 4 x i32> @trunc_1i64toi32(<vscale x 2 x i64> %in) {
+; CHECK-LABEL: trunc_1i64toi32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    ret
+entry:
+  %subvec = call <vscale x 1 x i64> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 2 x i64> %in, i64 0)
+  %out = trunc <vscale x 1 x i64> %subvec to <vscale x 1 x i32>
+  %out.legal = call <vscale x 4 x i32> @llvm.vector.insert.nxv8i32.nxv1i32(<vscale x 4 x i32> poison, <vscale x 1 x i32> %out, i64 0)
+  ret <vscale x 4 x i32> %out.legal
+}
+
+define <vscale x 8 x i16> @trunc_1i64toi16(<vscale x 2 x i64> %in) {
+; CHECK-LABEL: trunc_1i64toi16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    ret
+entry:
+  %subvec = call <vscale x 1 x i64> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 2 x i64> %in, i64 0)
+  %out = trunc <vscale x 1 x i64> %subvec to <vscale x 1 x i16>
+  %out.legal = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.nxv1i16(<vscale x 8 x i16> poison, <vscale x 1 x i16> %out, i64 0)
+  ret <vscale x 8 x i16> %out.legal
+}
+
+define <vscale x 16 x i8> @trunc_1i64toi8(<vscale x 2 x i64> %in) {
+; CHECK-LABEL: trunc_1i64toi8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    ret
+entry:
+  %subvec = call <vscale x 1 x i64> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 2 x i64> %in, i64 0)
+  %out = trunc <vscale x 1 x i64> %subvec to <vscale x 1 x i8>
+  %out.legal = call <vscale x 16 x i8> @llvm.vector.insert.nxv8i8.nxv1i8(<vscale x 16 x i8> poison, <vscale x 1 x i8> %out, i64 0)
+  ret <vscale x 16 x i8> %out.legal
+}
+
+define <vscale x 8 x i16> @trunc_1i32toi16(<vscale x 4 x i32> %in) {
+; CHECK-LABEL: trunc_1i32toi16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    ret
+entry:
+  %subvec = call <vscale x 1 x i32> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 4 x i32> %in, i64 0)
+  %out = trunc <vscale x 1 x i32> %subvec to <vscale x 1 x i16>
+  %out.legal = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.nxv1i16(<vscale x 8 x i16> poison, <vscale x 1 x i16> %out, i64 0)
+  ret <vscale x 8 x i16> %out.legal
+}
+
+define <vscale x 16 x i8> @trunc_1i32toi8(<vscale x 4 x i32> %in) {
+; CHECK-LABEL: trunc_1i32toi8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    ret
+entry:
+  %subvec = call <vscale x 1 x i32> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 4 x i32> %in, i64 0)
+  %out = trunc <vscale x 1 x i32> %subvec to <vscale x 1 x i8>
+  %out.legal = call <vscale x 16 x i8> @llvm.vector.insert.nxv8i8.nxv1i8(<vscale x 16 x i8> poison, <vscale x 1 x i8> %out, i64 0)
+  ret <vscale x 16 x i8> %out.legal
+}
+
+define <vscale x 16 x i8> @trunc_1i16toi8(<vscale x 8 x i16> %in) {
+; CHECK-LABEL: trunc_1i16toi8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    ret
+entry:
+  %subvec = call <vscale x 1 x i16> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 8 x i16> %in, i64 0)
+  %out = trunc <vscale x 1 x i16> %subvec to <vscale x 1 x i8>
+  %out.legal = call <vscale x 16 x i8> @llvm.vector.insert.nxv8i8.nxv1i8(<vscale x 16 x i8> poison, <vscale x 1 x i8> %out, i64 0)
+  ret <vscale x 16 x i8> %out.legal
+}
+
 ; Truncating to i1 requires convert it to a cmp
 
 define <vscale x 2 x i1> @trunc_i64toi1(<vscale x 2 x i64> %in) {