[llvm] [DAGCombiner] Don't optimize insert_vector_elt into shuffle if implicit truncation exists (PR #169022)

Fri Nov 21 02:42:18 PST 2025

https://github.com/XChy created https://github.com/llvm/llvm-project/pull/169022

Fixes #169017

>From b91a568baf8df8fb8846423aa428a0a940a549b1 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Fri, 21 Nov 2025 18:39:07 +0800
Subject: [PATCH] [DAGCombiner] Don't optimize insertelt into shuffle if
 implicit truncation exists

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  4 +++
 .../CodeGen/RISCV/rvv/fixed-vectors-insert.ll | 26 +++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f144f17d5a8f2..d41e0508b6907 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -23391,6 +23391,10 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
   EVT SubVecVT = SubVec.getValueType();
   EVT VT = DestVec.getValueType();
   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
+  // Bail out if the inserted value is larger than the vector element, as
+  // insert_vector_elt performs an implicit truncation in this case.
+  if (InsertVal.getValueType() != VT.getVectorElementType())
+    return SDValue();
   // If the source only has a single vector element, the cost of creating adding
   // it to a vector is likely to exceed the cost of a insert_vector_elt.
   if (NumSrcElts == 1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
index 3a5b3719931a9..79286c0304e0c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
@@ -1143,6 +1143,32 @@ define <4 x half> @insertelt_v4f16_idx(<4 x half> %a, half %y, i32 zeroext %idx)
   %b = insertelement <4 x half> %a, half %y, i32 %idx
   ret <4 x half> %b
 }
+
+define <2 x i8> @pr169017(<4 x i16> %vecinit, <2 x i8> %dst_vec) {
+; CHECK-LABEL: pr169017:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, tu, ma
+; CHECK-NEXT:    vmv.s.x v9, a0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+;
+; VISNI-LABEL: pr169017:
+; VISNI:       # %bb.0: # %entry
+; VISNI-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; VISNI-NEXT:    vmv.x.s a0, v8
+; VISNI-NEXT:    vsetvli zero, zero, e8, mf8, tu, ma
+; VISNI-NEXT:    vmv.s.x v9, a0
+; VISNI-NEXT:    vmv1r.v v8, v9
+; VISNI-NEXT:    ret
+entry:
+  %cast = bitcast <4 x i16> %vecinit to i64
+  %trunc = trunc i64 %cast to i8
+  %2 = insertelement <2 x i8> %dst_vec, i8 %trunc, i64 0
+  ret <2 x i8> %2
+}
+
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; ZVFHMINRV32: {{.*}}
 ; ZVFHMINRV64: {{.*}}