[llvm] [X86] Fixed truncated masked stores (PR #179853)

Thu Feb 5 00:52:43 PST 2026

https://github.com/phoebewang updated https://github.com/llvm/llvm-project/pull/179853

>From 7471996e693e11a9784b29df0c7d116cb13378b5 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Thu, 5 Feb 2026 11:24:12 +0800
Subject: [PATCH 1/2] [X86] Fixed truncated masked stores

Fixes: #179489
---
 llvm/lib/Target/X86/X86ISelLowering.cpp |  8 ++++++++
 llvm/test/CodeGen/X86/pr179489.ll       | 21 +++++++++++++++++++++
 2 files changed, 29 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/pr179489.ll

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e1aad028c14c0..914c3298dfc32 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -54090,6 +54090,14 @@ static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS,
   SDValue Extract =
       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Value, VecIndex);
 
+  if (MS->isTruncatingStore()) {
+    if (EltVT.isFloatingPoint())
+      return SDValue();
+
+    Extract = DAG.getNode(ISD::TRUNCATE, DL,
+                          MS->getMemoryVT().getVectorElementType(), Extract);
+  }
+
   // Store that element at the appropriate offset from the base pointer.
   return DAG.getStore(MS->getChain(), DL, Extract, Addr,
                       MS->getPointerInfo().getWithOffset(Offset),
diff --git a/llvm/test/CodeGen/X86/pr179489.ll b/llvm/test/CodeGen/X86/pr179489.ll
new file mode 100644
index 0000000000000..346aafe5b5769
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr179489.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx10.2 | FileCheck %s --check-prefix=CHECK
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
+declare void @llvm.masked.store.v8i8.p1(<8 x i8>, ptr addrspace(1) captures(none), <8 x i1>) #0
+
+define void @foo(<8 x i16> %arg, ptr addrspace(1) %add.ptr, i64 %dim0) {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmovw %xmm0, %eax
+; CHECK-NEXT:    movb %ah, (%rdi)
+; CHECK-NEXT:    retq
+entry:
+  %i3 = shufflevector <8 x i16> %arg, <8 x i16> <i16 poison, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257>, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %i4 = lshr <8 x i16> %i3, splat (i16 8)
+  %i5 = trunc <8 x i16> %i4 to <8 x i8>
+  call void @llvm.masked.store.v8i8.p1(<8 x i8> %i5, ptr addrspace(1) %add.ptr, <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>)
+  ret void
+}
+
+attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }

>From 7161983d536048929145dcc3bf9c612de975cd42 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Thu, 5 Feb 2026 16:52:16 +0800
Subject: [PATCH 2/2] Update comment

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 914c3298dfc32..87b1f355c0361 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -54060,8 +54060,8 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-/// If exactly one element of the mask is set for a non-truncating masked store,
-/// it is a vector extract and scalar store.
+/// If exactly one element of the mask is set for a masked store, it is a vector
+/// extract, truncate (iff truncating store) and scalar store.
 /// Note: It is expected that the degenerate cases of an all-zeros or all-ones
 /// mask have already been optimized in IR, so we don't bother with those here.
 static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS,