[llvm] [SDAG] Widen TRUNCATE to intermediate type to avoid ISel failure (PR #172473)

Thu Dec 18 07:10:57 PST 2025

https://github.com/gbossu updated https://github.com/llvm/llvm-project/pull/172473

>From cd09bf8a272b718d270b1b0e27d3a2f8d092cb51 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= <gaetan.bossu at arm.com>
Date: Mon, 15 Dec 2025 16:59:42 +0000
Subject: [PATCH 1/3] [SDAG] Widen TRUNCATE to intermediate type to avoid ISel
 failure

SelectionDAG offered no way to widen TRUNCATE for pathological types
like <vscale x 1 x ...> as they do not allow scalarisation.

One way to go further to is widen to an intermediate type which will
allow to promote the element type in a later run of legalisation.
---
 .../SelectionDAG/LegalizeVectorTypes.cpp      |  10 ++
 .../CodeGen/AArch64/saturating-vec-smull.ll   |   2 +-
 llvm/test/CodeGen/AArch64/sve-trunc.ll        | 120 ++++++++++++++++++
 3 files changed, 131 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 543072eba6d97..2c7e01af4859f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -5663,6 +5663,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
       if (Opcode == ISD::ZERO_EXTEND)
         return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
     }
+
+    // For TRUNCATE, try to widen using the legal EC of the input type instead
+    // if the legalisation action for that intermediate type is not widening.
+    EVT MidResVT =
+        EVT::getVectorVT(Ctx, WidenVT.getVectorElementType(), InVTEC);
+    if (N->getOpcode() == ISD::TRUNCATE &&
+        getTypeAction(MidResVT) == TargetLowering::TypePromoteInteger) {
+      SDValue MidRes = DAG.getNode(ISD::TRUNCATE, DL, MidResVT, InOp, Flags);
+      return DAG.getInsertSubvector(DL, DAG.getUNDEF(WidenVT), MidRes, 0);
+    }
   }
 
   if (TLI.isTypeLegal(InWidenVT)) {
diff --git a/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll b/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
index b647daf72ca35..15d72087fc7b4 100644
--- a/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
+++ b/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
@@ -211,7 +211,7 @@ define <1 x i16> @saturating_1xi16(<1 x i16> %a, <1 x i16> %b) {
 ; CHECK-NEXT:    movi v1.2s, #127, msl #8
 ; CHECK-NEXT:    sshr v0.2s, v0.2s, #15
 ; CHECK-NEXT:    smin v0.2s, v0.2s, v1.2s
-; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
+; CHECK-NEXT:    xtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
   %as = sext <1 x i16> %a to <1 x i32>
   %bs = sext <1 x i16> %b to <1 x i32>
diff --git a/llvm/test/CodeGen/AArch64/sve-trunc.ll b/llvm/test/CodeGen/AArch64/sve-trunc.ll
index be1f844c7f118..543e5bacd513b 100644
--- a/llvm/test/CodeGen/AArch64/sve-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-trunc.ll
@@ -56,6 +56,126 @@ entry:
   ret <vscale x 2 x i32> %out
 }
 
+define <vscale x 4 x i32> @trunc_i64toi32_legal_abi(<vscale x 2 x i64> %in) {
+; CHECK-LABEL: trunc_i64toi32_legal_abi:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    ret
+entry:
+  %out = trunc <vscale x 2 x i64> %in to <vscale x 2 x i32>
+  %out.legal = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.nxv2i32(<vscale x 4 x i32> poison, <vscale x 2 x i32> %out, i64 0)
+  ret <vscale x 4 x i32> %out.legal
+}
+
+define <vscale x 8 x i16> @trunc_i64toi16_legal_abi(<vscale x 2 x i64> %in) {
+; CHECK-LABEL: trunc_i64toi16_legal_abi:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    ret
+entry:
+  %out = trunc <vscale x 2 x i64> %in to <vscale x 2 x i16>
+  %out.legal = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.nxv2i16(<vscale x 8 x i16> poison, <vscale x 2 x i16> %out, i64 0)
+  ret <vscale x 8 x i16> %out.legal
+}
+
+; Truncating from an "illegal" small type to an even smaller type
+; requires promoting the element type first.
+
+define <vscale x 16 x i8> @trunc_4i16toi8(<vscale x 8 x i16> %in) {
+; CHECK-LABEL: trunc_4i16toi8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    ret
+entry:
+  %subvec = call <vscale x 4 x i16> @llvm.vector.extract.nvv4i16.nxv8i16(<vscale x 8 x i16> %in, i64 0)
+  %out = trunc <vscale x 4 x i16> %subvec to <vscale x 4 x i8>
+  %out.legal = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.nxv4i8(<vscale x 16 x i8> poison, <vscale x 4 x i8> %out, i64 0)
+  ret <vscale x 16 x i8> %out.legal
+}
+
+; <vscale x 1 x ...> types are tricky because their element type cannot be
+; promoted to form a legal vector type. Instead, they need widening.
+; Note: The uzp1 operations are due to vector.insert().
+
+define <vscale x 4 x i32> @trunc_1i64toi32(<vscale x 2 x i64> %in) {
+; CHECK-LABEL: trunc_1i64toi32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    ret
+entry:
+  %subvec = call <vscale x 1 x i64> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 2 x i64> %in, i64 0)
+  %out = trunc <vscale x 1 x i64> %subvec to <vscale x 1 x i32>
+  %out.legal = call <vscale x 4 x i32> @llvm.vector.insert.nxv8i32.nxv1i32(<vscale x 4 x i32> poison, <vscale x 1 x i32> %out, i64 0)
+  ret <vscale x 4 x i32> %out.legal
+}
+
+define <vscale x 8 x i16> @trunc_1i64toi16(<vscale x 2 x i64> %in) {
+; CHECK-LABEL: trunc_1i64toi16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    ret
+entry:
+  %subvec = call <vscale x 1 x i64> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 2 x i64> %in, i64 0)
+  %out = trunc <vscale x 1 x i64> %subvec to <vscale x 1 x i16>
+  %out.legal = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.nxv1i16(<vscale x 8 x i16> poison, <vscale x 1 x i16> %out, i64 0)
+  ret <vscale x 8 x i16> %out.legal
+}
+
+define <vscale x 16 x i8> @trunc_1i64toi8(<vscale x 2 x i64> %in) {
+; CHECK-LABEL: trunc_1i64toi8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    ret
+entry:
+  %subvec = call <vscale x 1 x i64> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 2 x i64> %in, i64 0)
+  %out = trunc <vscale x 1 x i64> %subvec to <vscale x 1 x i8>
+  %out.legal = call <vscale x 16 x i8> @llvm.vector.insert.nxv8i8.nxv1i8(<vscale x 16 x i8> poison, <vscale x 1 x i8> %out, i64 0)
+  ret <vscale x 16 x i8> %out.legal
+}
+
+define <vscale x 8 x i16> @trunc_1i32toi16(<vscale x 4 x i32> %in) {
+; CHECK-LABEL: trunc_1i32toi16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    ret
+entry:
+  %subvec = call <vscale x 1 x i32> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 4 x i32> %in, i64 0)
+  %out = trunc <vscale x 1 x i32> %subvec to <vscale x 1 x i16>
+  %out.legal = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.nxv1i16(<vscale x 8 x i16> poison, <vscale x 1 x i16> %out, i64 0)
+  ret <vscale x 8 x i16> %out.legal
+}
+
+define <vscale x 16 x i8> @trunc_1i32toi8(<vscale x 4 x i32> %in) {
+; CHECK-LABEL: trunc_1i32toi8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    ret
+entry:
+  %subvec = call <vscale x 1 x i32> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 4 x i32> %in, i64 0)
+  %out = trunc <vscale x 1 x i32> %subvec to <vscale x 1 x i8>
+  %out.legal = call <vscale x 16 x i8> @llvm.vector.insert.nxv8i8.nxv1i8(<vscale x 16 x i8> poison, <vscale x 1 x i8> %out, i64 0)
+  ret <vscale x 16 x i8> %out.legal
+}
+
+define <vscale x 16 x i8> @trunc_1i16toi8(<vscale x 8 x i16> %in) {
+; CHECK-LABEL: trunc_1i16toi8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    ret
+entry:
+  %subvec = call <vscale x 1 x i16> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 8 x i16> %in, i64 0)
+  %out = trunc <vscale x 1 x i16> %subvec to <vscale x 1 x i8>
+  %out.legal = call <vscale x 16 x i8> @llvm.vector.insert.nxv8i8.nxv1i8(<vscale x 16 x i8> poison, <vscale x 1 x i8> %out, i64 0)
+  ret <vscale x 16 x i8> %out.legal
+}
+
 ; Truncating to i1 requires convert it to a cmp
 
 define <vscale x 2 x i1> @trunc_i64toi1(<vscale x 2 x i64> %in) {

>From ecc2f0e31782d24b4a58f756a163dbcd4996f066 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= <gaetan.bossu at arm.com>
Date: Thu, 18 Dec 2025 08:44:11 +0000
Subject: [PATCH 2/3] Comments:

* get rid of tests using well-defined ABI and not testing new code
* replace getUNDEF with getPOISON
---
 .../SelectionDAG/LegalizeVectorTypes.cpp      |  2 +-
 llvm/test/CodeGen/AArch64/sve-trunc.ll        | 40 -------------------
 2 files changed, 1 insertion(+), 41 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 2c7e01af4859f..b404aadcf0f0c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -5671,7 +5671,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
     if (N->getOpcode() == ISD::TRUNCATE &&
         getTypeAction(MidResVT) == TargetLowering::TypePromoteInteger) {
       SDValue MidRes = DAG.getNode(ISD::TRUNCATE, DL, MidResVT, InOp, Flags);
-      return DAG.getInsertSubvector(DL, DAG.getUNDEF(WidenVT), MidRes, 0);
+      return DAG.getInsertSubvector(DL, DAG.getPOISON(WidenVT), MidRes, 0);
     }
   }
 
diff --git a/llvm/test/CodeGen/AArch64/sve-trunc.ll b/llvm/test/CodeGen/AArch64/sve-trunc.ll
index 543e5bacd513b..ec5086a091516 100644
--- a/llvm/test/CodeGen/AArch64/sve-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-trunc.ll
@@ -56,46 +56,6 @@ entry:
   ret <vscale x 2 x i32> %out
 }
 
-define <vscale x 4 x i32> @trunc_i64toi32_legal_abi(<vscale x 2 x i64> %in) {
-; CHECK-LABEL: trunc_i64toi32_legal_abi:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    ret
-entry:
-  %out = trunc <vscale x 2 x i64> %in to <vscale x 2 x i32>
-  %out.legal = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.nxv2i32(<vscale x 4 x i32> poison, <vscale x 2 x i32> %out, i64 0)
-  ret <vscale x 4 x i32> %out.legal
-}
-
-define <vscale x 8 x i16> @trunc_i64toi16_legal_abi(<vscale x 2 x i64> %in) {
-; CHECK-LABEL: trunc_i64toi16_legal_abi:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    ret
-entry:
-  %out = trunc <vscale x 2 x i64> %in to <vscale x 2 x i16>
-  %out.legal = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.nxv2i16(<vscale x 8 x i16> poison, <vscale x 2 x i16> %out, i64 0)
-  ret <vscale x 8 x i16> %out.legal
-}
-
-; Truncating from an "illegal" small type to an even smaller type
-; requires promoting the element type first.
-
-define <vscale x 16 x i8> @trunc_4i16toi8(<vscale x 8 x i16> %in) {
-; CHECK-LABEL: trunc_4i16toi8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    ret
-entry:
-  %subvec = call <vscale x 4 x i16> @llvm.vector.extract.nvv4i16.nxv8i16(<vscale x 8 x i16> %in, i64 0)
-  %out = trunc <vscale x 4 x i16> %subvec to <vscale x 4 x i8>
-  %out.legal = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.nxv4i8(<vscale x 16 x i8> poison, <vscale x 4 x i8> %out, i64 0)
-  ret <vscale x 16 x i8> %out.legal
-}
-
 ; <vscale x 1 x ...> types are tricky because their element type cannot be
 ; promoted to form a legal vector type. Instead, they need widening.
 ; Note: The uzp1 operations are due to vector.insert().

>From b722bdeaab80c033a481e89ccddc806fa33f4449 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= <gaetan.bossu at arm.com>
Date: Thu, 18 Dec 2025 15:08:55 +0000
Subject: [PATCH 3/3] Comments

 - Fix invalid types in intrinsic names
 - Add example for widening
---
 .../SelectionDAG/LegalizeVectorTypes.cpp      |  5 +++++
 llvm/test/CodeGen/AArch64/sve-trunc.ll        | 19 ++++++++++---------
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index b404aadcf0f0c..b5c6582449d45 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -5666,6 +5666,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
 
     // For TRUNCATE, try to widen using the legal EC of the input type instead
     // if the legalisation action for that intermediate type is not widening.
+    // E.g. for trunc nxv1i64 -> nxv1i8 where
+    //  - nxv1i64 input gets widened to nxv2i64
+    //  - nxv1i8 output gets widened to nxv16i8
+    // Then one can try widening the result to nxv2i8 (instead of going all the
+    // way to nxv16i8) if this later allows type promotion.
     EVT MidResVT =
         EVT::getVectorVT(Ctx, WidenVT.getVectorElementType(), InVTEC);
     if (N->getOpcode() == ISD::TRUNCATE &&
diff --git a/llvm/test/CodeGen/AArch64/sve-trunc.ll b/llvm/test/CodeGen/AArch64/sve-trunc.ll
index ec5086a091516..42500b53222a2 100644
--- a/llvm/test/CodeGen/AArch64/sve-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-trunc.ll
@@ -58,7 +58,8 @@ entry:
 
 ; <vscale x 1 x ...> types are tricky because their element type cannot be
 ; promoted to form a legal vector type. Instead, they need widening.
-; Note: The uzp1 operations are due to vector.insert().
+; Note: The uzp1 operations are due to vector.insert(), which is required
+; to avoid relying on an undefined ABI.
 
 define <vscale x 4 x i32> @trunc_1i64toi32(<vscale x 2 x i64> %in) {
 ; CHECK-LABEL: trunc_1i64toi32:
@@ -66,9 +67,9 @@ define <vscale x 4 x i32> @trunc_1i64toi32(<vscale x 2 x i64> %in) {
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    ret
 entry:
-  %subvec = call <vscale x 1 x i64> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 2 x i64> %in, i64 0)
+  %subvec = call <vscale x 1 x i64> @llvm.vector.extract.nxv1i64.nxv2i64(<vscale x 2 x i64> %in, i64 0)
   %out = trunc <vscale x 1 x i64> %subvec to <vscale x 1 x i32>
-  %out.legal = call <vscale x 4 x i32> @llvm.vector.insert.nxv8i32.nxv1i32(<vscale x 4 x i32> poison, <vscale x 1 x i32> %out, i64 0)
+  %out.legal = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.nxv1i32(<vscale x 4 x i32> poison, <vscale x 1 x i32> %out, i64 0)
   ret <vscale x 4 x i32> %out.legal
 }
 
@@ -79,7 +80,7 @@ define <vscale x 8 x i16> @trunc_1i64toi16(<vscale x 2 x i64> %in) {
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    ret
 entry:
-  %subvec = call <vscale x 1 x i64> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 2 x i64> %in, i64 0)
+  %subvec = call <vscale x 1 x i64> @llvm.vector.extract.nxv1i64.nxv2i64(<vscale x 2 x i64> %in, i64 0)
   %out = trunc <vscale x 1 x i64> %subvec to <vscale x 1 x i16>
   %out.legal = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.nxv1i16(<vscale x 8 x i16> poison, <vscale x 1 x i16> %out, i64 0)
   ret <vscale x 8 x i16> %out.legal
@@ -93,9 +94,9 @@ define <vscale x 16 x i8> @trunc_1i64toi8(<vscale x 2 x i64> %in) {
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
 ; CHECK-NEXT:    ret
 entry:
-  %subvec = call <vscale x 1 x i64> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 2 x i64> %in, i64 0)
+  %subvec = call <vscale x 1 x i64> @llvm.vector.extract.nxv1i64.nxv2i64(<vscale x 2 x i64> %in, i64 0)
   %out = trunc <vscale x 1 x i64> %subvec to <vscale x 1 x i8>
-  %out.legal = call <vscale x 16 x i8> @llvm.vector.insert.nxv8i8.nxv1i8(<vscale x 16 x i8> poison, <vscale x 1 x i8> %out, i64 0)
+  %out.legal = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.nxv1i8(<vscale x 16 x i8> poison, <vscale x 1 x i8> %out, i64 0)
   ret <vscale x 16 x i8> %out.legal
 }
 
@@ -120,7 +121,7 @@ define <vscale x 16 x i8> @trunc_1i32toi8(<vscale x 4 x i32> %in) {
 entry:
   %subvec = call <vscale x 1 x i32> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 4 x i32> %in, i64 0)
   %out = trunc <vscale x 1 x i32> %subvec to <vscale x 1 x i8>
-  %out.legal = call <vscale x 16 x i8> @llvm.vector.insert.nxv8i8.nxv1i8(<vscale x 16 x i8> poison, <vscale x 1 x i8> %out, i64 0)
+  %out.legal = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.nxv1i8(<vscale x 16 x i8> poison, <vscale x 1 x i8> %out, i64 0)
   ret <vscale x 16 x i8> %out.legal
 }
 
@@ -130,9 +131,9 @@ define <vscale x 16 x i8> @trunc_1i16toi8(<vscale x 8 x i16> %in) {
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
 ; CHECK-NEXT:    ret
 entry:
-  %subvec = call <vscale x 1 x i16> @llvm.vector.extract.nxv1i32.nxv4i32(<vscale x 8 x i16> %in, i64 0)
+  %subvec = call <vscale x 1 x i16> @llvm.vector.extract.nxv1i16.nxv8i16(<vscale x 8 x i16> %in, i64 0)
   %out = trunc <vscale x 1 x i16> %subvec to <vscale x 1 x i8>
-  %out.legal = call <vscale x 16 x i8> @llvm.vector.insert.nxv8i8.nxv1i8(<vscale x 16 x i8> poison, <vscale x 1 x i8> %out, i64 0)
+  %out.legal = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.nxv1i8(<vscale x 16 x i8> poison, <vscale x 1 x i8> %out, i64 0)
   ret <vscale x 16 x i8> %out.legal
 }