[llvm] [X86] InstCombine: Generalize scalar SSE MAX/MIN intrinsics (PR #175375)
Guilherme oliveira de campos via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 11 08:57:42 PST 2026
https://github.com/guiolidc updated https://github.com/llvm/llvm-project/pull/175375
>From 34fa778699cb8320f1acf4543f9452ca8645be37 Mon Sep 17 00:00:00 2001
From: Guilherme Oliveira de Campos <oliveira.gui at hotmail.com.br>
Date: Sat, 10 Jan 2026 14:47:46 -0300
Subject: [PATCH] [X86] InstCombine: Generalize scalar SSE MAX/MIN intrinsics
This handles x86_sse_max_ss/min_ss and related intrinsics. It check if is known to be safe to convert them to llvm.maxnum/minnum
---
.../Target/X86/X86InstCombineIntrinsic.cpp | 49 ++++++++++++++----
.../InstCombine/X86/x86-scalar-max-min.ll | 51 +++++++++++++++++++
2 files changed, 91 insertions(+), 9 deletions(-)
create mode 100644 llvm/test/Transforms/InstCombine/X86/x86-scalar-max-min.ll
diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
index 5eff38b214aef..76d543a9ab23f 100644
--- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
@@ -1734,24 +1734,41 @@ static Value *simplifyTernarylogic(const IntrinsicInst &II,
return Res.first;
}
-static Value *simplifyX86FPMaxMin(const IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder,
- Intrinsic::ID NewIID) {
+static Value *simplifyX86FPMaxMin(const IntrinsicInst &II, InstCombiner &IC,
+ Intrinsic::ID NewIID, bool IsScalar = false) {
Value *Arg0 = II.getArgOperand(0);
Value *Arg1 = II.getArgOperand(1);
+ unsigned VWidth = cast<FixedVectorType>(Arg0->getType())->getNumElements();
+
+ SimplifyQuery SQ = IC.getSimplifyQuery().getWithInstruction(&II);
+ APInt DemandedElts =
+ IsScalar ? APInt::getOneBitSet(VWidth, 0) : APInt::getAllOnes(VWidth);
// Verify that the inputs are not one of (NaN, Inf, Subnormal, NegZero),
// otherwise we cannot safely generalize to MAXNUM/MINNUM.
FPClassTest Forbidden = fcNan | fcInf | fcSubnormal | fcNegZero;
KnownFPClass KnownArg0 =
- computeKnownFPClass(Arg0, Forbidden, II.getDataLayout(), 0);
+ computeKnownFPClass(Arg0, DemandedElts, Forbidden, SQ);
KnownFPClass KnownArg1 =
- computeKnownFPClass(Arg1, Forbidden, II.getDataLayout(), 0);
+ computeKnownFPClass(Arg1, DemandedElts, Forbidden, SQ);
if (KnownArg0.isKnownNever(Forbidden) && KnownArg1.isKnownNever(Forbidden)) {
- return (NewIID == Intrinsic::maxnum) ? Builder.CreateMaxNum(Arg0, Arg1)
- : Builder.CreateMinNum(Arg0, Arg1);
+ if (IsScalar) {
+ // It performs the operation on the first element and puts it back into
+ // the vector.
+ Value *Scalar0 = IC.Builder.CreateExtractElement(Arg0, (uint64_t)0);
+ Value *Scalar1 = IC.Builder.CreateExtractElement(Arg1, (uint64_t)0);
+
+ Value *NewScalar = (NewIID == Intrinsic::maxnum)
+ ? IC.Builder.CreateMaxNum(Scalar0, Scalar1)
+ : IC.Builder.CreateMinNum(Scalar0, Scalar1);
+ return IC.Builder.CreateInsertElement(Arg0, NewScalar, (uint64_t)0);
+ } else {
+ return (NewIID == Intrinsic::maxnum)
+ ? IC.Builder.CreateMaxNum(Arg0, Arg1)
+ : IC.Builder.CreateMinNum(Arg0, Arg1);
+ }
}
return nullptr;
@@ -3138,6 +3155,20 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
return IC.replaceInstUsesWith(II, V);
}
break;
+
+ case Intrinsic::x86_sse_max_ss:
+ case Intrinsic::x86_sse2_max_sd: {
+ if (Value *V = simplifyX86FPMaxMin(II, IC, Intrinsic::maxnum, true))
+ return IC.replaceInstUsesWith(II, V);
+ break;
+ }
+ case Intrinsic::x86_sse_min_ss:
+ case Intrinsic::x86_sse2_min_sd: {
+ if (Value *V = simplifyX86FPMaxMin(II, IC, Intrinsic::minimum, true))
+ return IC.replaceInstUsesWith(II, V);
+ break;
+ }
+
default:
break;
}
@@ -3351,7 +3382,7 @@ std::optional<Value *> X86TTIImpl::simplifyDemandedVectorEltsIntrinsic(
case Intrinsic::x86_avx512fp16_max_ph_128:
case Intrinsic::x86_avx512fp16_max_ph_256:
case Intrinsic::x86_avx512fp16_max_ph_512:
- if (Value *V = simplifyX86FPMaxMin(II, IC.Builder, Intrinsic::maxnum))
+ if (Value *V = simplifyX86FPMaxMin(II, IC, Intrinsic::maxnum))
return IC.replaceInstUsesWith(II, V);
break;
case Intrinsic::x86_sse_min_ps:
@@ -3363,7 +3394,7 @@ std::optional<Value *> X86TTIImpl::simplifyDemandedVectorEltsIntrinsic(
case Intrinsic::x86_avx512fp16_min_ph_128:
case Intrinsic::x86_avx512fp16_min_ph_256:
case Intrinsic::x86_avx512fp16_min_ph_512:
- if (Value *V = simplifyX86FPMaxMin(II, IC.Builder, Intrinsic::minnum))
+ if (Value *V = simplifyX86FPMaxMin(II, IC, Intrinsic::minnum))
return IC.replaceInstUsesWith(II, V);
break;
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-scalar-max-min.ll b/llvm/test/Transforms/InstCombine/X86/x86-scalar-max-min.ll
new file mode 100644
index 0000000000000..9b46a84316433
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-scalar-max-min.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
+
+define <4 x float> @test_max_ss_nan(<4 x float> %a) {
+ ; We pass a vector where the bottom element is NaN
+; CHECK-LABEL: define <4 x float> @test_max_ss_nan(
+; CHECK-SAME: <4 x float> [[A:%.*]]) {
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[A]], <4 x float> <float 0x7FF8000000000000, float poison, float poison, float poison>)
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> <float 0x7FF8000000000000, float 0.0, float 0.0, float 0.0>)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_min_ss_inf(<4 x float> %a) {
+; CHECK-LABEL: define <4 x float> @test_min_ss_inf(
+; CHECK-SAME: <4 x float> [[A:%.*]]) {
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[A]], <4 x float> <float 0x7FF0000000000000, float poison, float poison, float poison>)
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> <float 0x7FF0000000000000, float 0.0, float 0.0, float 0.0>)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_max_ss_neg_zero(<4 x float> %a) {
+; CHECK-LABEL: define <4 x float> @test_max_ss_neg_zero(
+; CHECK-SAME: <4 x float> [[A:%.*]]) {
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[A]], <4 x float> <float -0.000000e+00, float poison, float poison, float poison>)
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> <float -0.0, float 0.0, float 0.0, float 0.0>)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_max_ss_variable_unsafe(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: define <4 x float> @test_max_ss_variable_unsafe(
+; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) {
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[A]], <4 x float> [[B]])
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_max_ss_safe_constants() {
+; CHECK-LABEL: define <4 x float> @test_max_ss_safe_constants() {
+; CHECK-NEXT: ret <4 x float> <float 2.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
+;
+ %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> <float 1.0, float 0.0, float 0.0, float 0.0>, <4 x float> <float 2.0, float 0.0, float 0.0, float 0.0>)
+ ret <4 x float> %res
+}
More information about the llvm-commits
mailing list