[llvm] 7da282f - [X86] combineArithReduction - avoid PSADBW expansion for 128 bit integers and larger (#173979)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 30 04:28:29 PST 2025
Author: Simon Pilgrim
Date: 2025-12-30T12:28:24Z
New Revision: 7da282fd3820126183e2bfcaa9ea5c45096397ce
URL: https://github.com/llvm/llvm-project/commit/7da282fd3820126183e2bfcaa9ea5c45096397ce
DIFF: https://github.com/llvm/llvm-project/commit/7da282fd3820126183e2bfcaa9ea5c45096397ce.diff
LOG: [X86] combineArithReduction - avoid PSADBW expansion for 128 bit integers and larger (#173979)
Fixes #173924
Added:
llvm/test/CodeGen/X86/pr173924.ll
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index aefe861a62a54..a12582afa7c30 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47262,7 +47262,7 @@ static SDValue combineArithReduction(SDNode *ExtElt, SelectionDAG &DAG,
// sum+zext v8i8 subvectors to vXi64, then perform the reduction.
// TODO: See if its worth avoiding vXi16/i32 truncations?
if (Opc == ISD::ADD && NumElts >= 4 && EltSizeInBits >= 16 &&
- DAG.computeKnownBits(Rdx).getMaxValue().ule(255) &&
+ EltSizeInBits <= 64 && DAG.computeKnownBits(Rdx).getMaxValue().ule(255) &&
(EltSizeInBits == 16 || Rdx.getOpcode() == ISD::ZERO_EXTEND ||
Subtarget.hasAVX512())) {
if (Rdx.getValueType() == MVT::v8i16) {
diff --git a/llvm/test/CodeGen/X86/pr173924.ll b/llvm/test/CodeGen/X86/pr173924.ll
new file mode 100644
index 0000000000000..f5059da10da7c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr173924.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s
+
+define i256 @PR173924(<8 x i256> %a0) {
+; CHECK-LABEL: PR173924:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %edi
+; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %edx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r8d
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r10d
+; CHECK-NEXT: andl $1, %r10d
+; CHECK-NEXT: andl $1, %esi
+; CHECK-NEXT: addl %r10d, %esi
+; CHECK-NEXT: andl $1, %r8d
+; CHECK-NEXT: andl $1, %ecx
+; CHECK-NEXT: addl %r8d, %ecx
+; CHECK-NEXT: addl %esi, %ecx
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: vmovq %xmm0, %rsi
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: andl $1, %r9d
+; CHECK-NEXT: addl %edi, %r9d
+; CHECK-NEXT: addl %edx, %esi
+; CHECK-NEXT: addl %r9d, %esi
+; CHECK-NEXT: addl %ecx, %esi
+; CHECK-NEXT: vmovd %esi, %xmm0
+; CHECK-NEXT: vmovdqu %ymm0, (%rax)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %m = and <8 x i256> %a0, splat (i256 1)
+ %r = call i256 @llvm.vector.reduce.add.v8i256(<8 x i256> %m)
+ ret i256 %r
+}
More information about the llvm-commits
mailing list