[llvm] [X86] Ensure we fold pow2 masks with the mask type, not the result type (PR #173984)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 30 04:34:54 PST 2025
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/173984
This was missed in #173366 when we relaxed the constraint on the types to just be the same element width
Fixes #173794
>From dfbf435b519df5c33a428c403dab6f12a4ccb055 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Tue, 30 Dec 2025 12:34:00 +0000
Subject: [PATCH] [X86] Ensure we fold pow2 masks with the mask type, not the
result type
This was missed in #173366 when we relaxed the constraint on the types to just be the same element width
Fixes #173794
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 5 +--
llvm/test/CodeGen/X86/pr173794.ll | 43 +++++++++++++++++++++++++
2 files changed, 46 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/pr173794.ll
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index aefe861a62a54..db17c4b6356e4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48471,8 +48471,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
MaskVal->getAPIntValue().exactLogBase2());
}
// vsel ((X & C) == 0), LHS, RHS --> vsel ((shl X, C') < 0), RHS, LHS
- SDValue ShlAmt = getConstVector(ShlVals, VT.getSimpleVT(), DAG, DL);
- SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And.getOperand(0), ShlAmt);
+ MVT MskVT = Mask.getSimpleValueType();
+ SDValue ShlAmt = getConstVector(ShlVals, MskVT, DAG, DL);
+ SDValue Shl = DAG.getNode(ISD::SHL, DL, MskVT, And.getOperand(0), ShlAmt);
SDValue NewCond =
DAG.getSetCC(DL, CondVT, Shl, Cond.getOperand(1), ISD::SETLT);
return DAG.getSelect(DL, VT, NewCond, RHS, LHS);
diff --git a/llvm/test/CodeGen/X86/pr173794.ll b/llvm/test/CodeGen/X86/pr173794.ll
new file mode 100644
index 0000000000000..b38bc8c7ad581
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr173794.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefix=SSE42
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefix=AVX512
+
+define <2 x double> @PR173794(<2 x i64> %a0) {
+; SSE2-LABEL: PR173794:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: PR173794:
+; SSE42: # %bb.0:
+; SSE42-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE42-NEXT: pxor %xmm1, %xmm1
+; SSE42-NEXT: pcmpeqq %xmm1, %xmm0
+; SSE42-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE42-NEXT: retq
+;
+; AVX2-LABEL: PR173794:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: PR173794:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
+; AVX512-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = [1.0E+0,1.0E+0]
+; AVX512-NEXT: # xmm0 {%k1} {z} = mem[0,0]
+; AVX512-NEXT: retq
+ %m = and <2 x i64> %a0, <i64 1, i64 2>
+ %c = icmp eq <2 x i64> %m, zeroinitializer
+ %r = select <2 x i1> %c, <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer
+ ret <2 x double> %r
+}
More information about the llvm-commits
mailing list