[llvm] [X86] InstCombine: Generalize scalar SSE MAX/MIN intrinsics (PR #175375)
Guilherme oliveira de campos via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 12 05:19:26 PST 2026
https://github.com/guiolidc updated https://github.com/llvm/llvm-project/pull/175375
>From d711c1112ff3a175cef2849bc581e5e83929a61e Mon Sep 17 00:00:00 2001
From: Guilherme Oliveira de Campos <oliveira.gui at hotmail.com.br>
Date: Sat, 10 Jan 2026 14:47:46 -0300
Subject: [PATCH] [X86] InstCombine: Generalize scalar SSE MAX/MIN intrinsics
This handles x86_sse_max_ss/min_ss and related intrinsics. It check if is known to be safe to convert them to llvm.maxnum/minnum
---
.../Target/X86/X86InstCombineIntrinsic.cpp | 49 +++++++++---
.../InstCombine/X86/x86-scalar-max-min.ll | 74 +++++++++++++++++++
2 files changed, 114 insertions(+), 9 deletions(-)
create mode 100644 llvm/test/Transforms/InstCombine/X86/x86-scalar-max-min.ll
diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
index 7345dc794de7b..5321960ed852d 100644
--- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
@@ -1734,24 +1734,41 @@ static Value *simplifyTernarylogic(const IntrinsicInst &II,
return Res.first;
}
-static Value *simplifyX86FPMaxMin(const IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder,
- Intrinsic::ID NewIID) {
+static Value *simplifyX86FPMaxMin(const IntrinsicInst &II, InstCombiner &IC,
+ Intrinsic::ID NewIID, bool IsScalar = false) {
Value *Arg0 = II.getArgOperand(0);
Value *Arg1 = II.getArgOperand(1);
+ unsigned VWidth = cast<FixedVectorType>(Arg0->getType())->getNumElements();
+
+ SimplifyQuery SQ = IC.getSimplifyQuery().getWithInstruction(&II);
+ APInt DemandedElts =
+ IsScalar ? APInt::getOneBitSet(VWidth, 0) : APInt::getAllOnes(VWidth);
// Verify that the inputs are not one of (NaN, Inf, Subnormal, NegZero),
// otherwise we cannot safely generalize to MAXNUM/MINNUM.
FPClassTest Forbidden = fcNan | fcInf | fcSubnormal | fcNegZero;
KnownFPClass KnownArg0 =
- computeKnownFPClass(Arg0, Forbidden, II.getDataLayout(), 0);
+ computeKnownFPClass(Arg0, DemandedElts, Forbidden, SQ);
KnownFPClass KnownArg1 =
- computeKnownFPClass(Arg1, Forbidden, II.getDataLayout(), 0);
+ computeKnownFPClass(Arg1, DemandedElts, Forbidden, SQ);
if (KnownArg0.isKnownNever(Forbidden) && KnownArg1.isKnownNever(Forbidden)) {
- return (NewIID == Intrinsic::maxnum) ? Builder.CreateMaxNum(Arg0, Arg1)
- : Builder.CreateMinNum(Arg0, Arg1);
+ if (IsScalar) {
+ // It performs the operation on the first element and puts it back into
+ // the vector.
+ Value *Scalar0 = IC.Builder.CreateExtractElement(Arg0, (uint64_t)0);
+ Value *Scalar1 = IC.Builder.CreateExtractElement(Arg1, (uint64_t)0);
+
+ Value *NewScalar = (NewIID == Intrinsic::maxnum)
+ ? IC.Builder.CreateMaxNum(Scalar0, Scalar1)
+ : IC.Builder.CreateMinNum(Scalar0, Scalar1);
+ return IC.Builder.CreateInsertElement(Arg0, NewScalar, (uint64_t)0);
+ } else {
+ return (NewIID == Intrinsic::maxnum)
+ ? IC.Builder.CreateMaxNum(Arg0, Arg1)
+ : IC.Builder.CreateMinNum(Arg0, Arg1);
+ }
}
return nullptr;
@@ -2534,7 +2551,7 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
case Intrinsic::x86_avx512fp16_max_ph_128:
case Intrinsic::x86_avx512fp16_max_ph_256:
case Intrinsic::x86_avx512fp16_max_ph_512:
- if (Value *V = simplifyX86FPMaxMin(II, IC.Builder, Intrinsic::maxnum))
+ if (Value *V = simplifyX86FPMaxMin(II, IC, Intrinsic::maxnum))
return IC.replaceInstUsesWith(II, V);
break;
@@ -2547,7 +2564,7 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
case Intrinsic::x86_avx512fp16_min_ph_128:
case Intrinsic::x86_avx512fp16_min_ph_256:
case Intrinsic::x86_avx512fp16_min_ph_512:
- if (Value *V = simplifyX86FPMaxMin(II, IC.Builder, Intrinsic::minnum))
+ if (Value *V = simplifyX86FPMaxMin(II, IC, Intrinsic::minnum))
return IC.replaceInstUsesWith(II, V);
break;
@@ -3165,6 +3182,20 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
return IC.replaceInstUsesWith(II, V);
}
break;
+
+ case Intrinsic::x86_sse_max_ss:
+ case Intrinsic::x86_sse2_max_sd: {
+ if (Value *V = simplifyX86FPMaxMin(II, IC, Intrinsic::maxnum, true))
+ return IC.replaceInstUsesWith(II, V);
+ break;
+ }
+ case Intrinsic::x86_sse_min_ss:
+ case Intrinsic::x86_sse2_min_sd: {
+ if (Value *V = simplifyX86FPMaxMin(II, IC, Intrinsic::minimum, true))
+ return IC.replaceInstUsesWith(II, V);
+ break;
+ }
+
default:
break;
}
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-scalar-max-min.ll b/llvm/test/Transforms/InstCombine/X86/x86-scalar-max-min.ll
new file mode 100644
index 0000000000000..0119a8ed3dd9b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-scalar-max-min.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
+
+define <4 x float> @test_max_ss_nan(<4 x float> %a) {
+; CHECK-LABEL: define <4 x float> @test_max_ss_nan(
+; CHECK-SAME: <4 x float> [[A:%.*]]) {
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[A]], <4 x float> <float 0x7FF8000000000000, float poison, float poison, float poison>)
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> <float 0x7FF8000000000000, float 0.0, float 0.0, float 0.0>)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_min_ss_inf(<4 x float> %a) {
+; CHECK-LABEL: define <4 x float> @test_min_ss_inf(
+; CHECK-SAME: <4 x float> [[A:%.*]]) {
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[A]], <4 x float> <float 0x7FF0000000000000, float poison, float poison, float poison>)
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> <float 0x7FF0000000000000, float 0.0, float 0.0, float 0.0>)
+ ret <4 x float> %res
+}
+
+define <2 x double> @test_max_sd_neg_zero(<2 x double> %a) {
+; CHECK-LABEL: define <2 x double> @test_max_sd_neg_zero(
+; CHECK-SAME: <2 x double> [[A:%.*]]) {
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> [[A]], <2 x double> <double -0.000000e+00, double poison>)
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a, <2 x double> <double -0.0, double 0.0>)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_min_sd_neg_zero(<2 x double> %a) {
+; CHECK-LABEL: define <2 x double> @test_min_sd_neg_zero(
+; CHECK-SAME: <2 x double> [[A:%.*]]) {
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> [[A]], <2 x double> <double -0.000000e+00, double poison>)
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a, <2 x double> <double -0.0, double 0.0>)
+ ret <2 x double> %res
+}
+
+define <4 x float> @test_max_ss_variable_unsafe(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: define <4 x float> @test_max_ss_variable_unsafe(
+; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) {
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[A]], <4 x float> [[B]])
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_max_ss_safe_constants() {
+; CHECK-LABEL: define <4 x float> @test_max_ss_safe_constants() {
+; CHECK-NEXT: ret <4 x float> <float 2.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
+;
+ %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> <float 1.0, float 0.0, float 0.0, float 0.0>, <4 x float> <float 2.0, float 0.0, float 0.0, float 0.0>)
+ ret <4 x float> %res
+}
+
+define <2 x double> @test_min_sd_safe_expansion(<2 x i32> %a) {
+; CHECK-LABEL: define <2 x double> @test_min_sd_safe_expansion(
+; CHECK-SAME: <2 x i32> [[A:%.*]]) {
+; CHECK-NEXT: [[SAFE_INPUT:%.*]] = sitofp <2 x i32> [[A]] to <2 x double>
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[SAFE_INPUT]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[TMP1]], double 5.000000e+00)
+; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x double> [[SAFE_INPUT]], double [[TMP2]], i64 0
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %safe_input = sitofp <2 x i32> %a to <2 x double>
+ %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %safe_input, <2 x double> <double 5.0, double 0.0>)
+ ret <2 x double> %res
+}
More information about the llvm-commits
mailing list