[llvm] [X86][InstCombine] Generalize SSE/AVX fp MAX/MIN intrinsics to maxnum/minnum (PR #174806)

Fri Jan 9 03:09:48 PST 2026

https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/174806

>From e8c96777dbccb506d46f048893d6d4b908081ffe Mon Sep 17 00:00:00 2001
From: neil <hello at neil.place>
Date: Wed, 7 Jan 2026 13:42:28 -0500
Subject: [PATCH 1/3] [x86] Generalize SSE/AVX fp MAX/MIN intrinsics to
 maxnum/minnum

---
 .../Target/X86/X86InstCombineIntrinsic.cpp    |  49 +++++
 .../Transforms/InstCombine/X86/x86-maxmin.ll  | 187 ++++++++++++++++++
 2 files changed, 236 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/X86/x86-maxmin.ll

diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
index cbfaf0f60333f..047847fd9cbbc 100644
--- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
@@ -1734,6 +1734,29 @@ static Value *simplifyTernarylogic(const IntrinsicInst &II,
   return Res.first;
 }
 
+static Value *simplifyX86FPMaxMin(const IntrinsicInst &II,
+                                InstCombiner::BuilderTy &Builder,
+                                Intrinsic::ID NewIID) {
+
+  Value *Arg0 = II.getArgOperand(0);
+  Value *Arg1 = II.getArgOperand(1);
+
+  // Verify that the inputs are not one of (NaN, Inf, Subnormal, NegZero),
+  // otherwise we cannot safely generalize to MAXNUM/MINNUM.
+  FPClassTest Forbidden = fcNan | fcInf | fcSubnormal | fcNegZero;
+  KnownFPClass KnownArg0 =
+      computeKnownFPClass(Arg0, Forbidden, II.getDataLayout(), 0);
+  KnownFPClass KnownArg1 =
+      computeKnownFPClass(Arg1, Forbidden, II.getDataLayout(), 0);
+
+  if (KnownArg0.isKnownNever(Forbidden) && KnownArg1.isKnownNever(Forbidden)) {
+    return (NewIID == Intrinsic::maxnum) ? Builder.CreateMaxNum(Arg0, Arg1)
+                                         : Builder.CreateMinNum(Arg0, Arg1);
+  }
+
+  return nullptr;
+}
+
 static Value *simplifyX86insertps(const IntrinsicInst &II,
                                   InstCombiner::BuilderTy &Builder) {
   auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
@@ -3318,6 +3341,32 @@ std::optional<Value *> X86TTIImpl::simplifyDemandedVectorEltsIntrinsic(
     break;
   }
 
+  // Generalize SSE/AVX FP to maxnum/minnum.
+  case Intrinsic::x86_sse_max_ps:
+  case Intrinsic::x86_sse2_max_pd:
+  case Intrinsic::x86_avx_max_pd_256:
+  case Intrinsic::x86_avx_max_ps_256:
+  case Intrinsic::x86_avx512_max_pd_512:
+  case Intrinsic::x86_avx512_max_ps_512:
+  case Intrinsic::x86_avx512fp16_max_ph_128:
+  case Intrinsic::x86_avx512fp16_max_ph_256:
+  case Intrinsic::x86_avx512fp16_max_ph_512:
+    if (Value *V = simplifyX86FPMaxMin(II, IC.Builder, Intrinsic::maxnum))
+      return IC.replaceInstUsesWith(II, V);
+    break;
+  case Intrinsic::x86_sse_min_ps:
+  case Intrinsic::x86_sse2_min_pd:
+  case Intrinsic::x86_avx_min_pd_256:
+  case Intrinsic::x86_avx_min_ps_256:
+  case Intrinsic::x86_avx512_min_pd_512:
+  case Intrinsic::x86_avx512_min_ps_512:
+  case Intrinsic::x86_avx512fp16_min_ph_128:
+  case Intrinsic::x86_avx512fp16_min_ph_256:
+  case Intrinsic::x86_avx512fp16_min_ph_512:
+    if (Value *V = simplifyX86FPMaxMin(II, IC.Builder, Intrinsic::minnum))
+      return IC.replaceInstUsesWith(II, V);
+    break;
+
   // General per-element vector operations.
   case Intrinsic::x86_avx2_psllv_d:
   case Intrinsic::x86_avx2_psllv_d_256:
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-maxmin.ll b/llvm/test/Transforms/InstCombine/X86/x86-maxmin.ll
new file mode 100644
index 0000000000000..3aa8a6d0f9a4f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-maxmin.ll
@@ -0,0 +1,187 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
+
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>)
+declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>)
+declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>)
+declare <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half>, <32 x half>, i32)
+declare <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half>, <32 x half>, i32)
+
+define <4 x float> @test_sse_max_ps(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: define <4 x float> @test_sse_max_ps(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CVT:%.*]] = sitofp <4 x i32> [[X]] to <4 x float>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <4 x i32> [[Y]], splat (i32 -1)
+; CHECK-NEXT:    [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x float> splat (float 1.000000e+00), <4 x float> splat (float -1.000000e+00)
+; CHECK-NEXT:    [[MAX:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[CVT]], <4 x float> [[SEL]])
+; CHECK-NEXT:    ret <4 x float> [[MAX]]
+;
+entry:
+  %cvt = sitofp <4 x i32> %x to <4 x float>
+  %cmp = icmp sgt <4 x i32> %y, splat (i32 -1)
+  %sel = select <4 x i1> %cmp, <4 x float> splat (float 1.000000e+00), <4 x float> splat (float -1.000000e+00)
+  %max = tail call noundef <4 x float> @llvm.x86.sse.max.ps(<4 x float> %cvt, <4 x float> %sel)
+  ret <4 x float> %max
+}
+
+define <4 x float> @test_sse_min_ps(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: define <4 x float> @test_sse_min_ps(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CVT:%.*]] = sitofp <4 x i32> [[X]] to <4 x float>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <4 x i32> [[Y]], splat (i32 -1)
+; CHECK-NEXT:    [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x float> splat (float 1.000000e+00), <4 x float> splat (float -1.000000e+00)
+; CHECK-NEXT:    [[MAX:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[CVT]], <4 x float> [[SEL]])
+; CHECK-NEXT:    ret <4 x float> [[MAX]]
+;
+entry:
+  %cvt = sitofp <4 x i32> %x to <4 x float>
+  %cmp = icmp sgt <4 x i32> %y, splat (i32 -1)
+  %sel = select <4 x i1> %cmp, <4 x float> splat (float 1.000000e+00), <4 x float> splat (float -1.000000e+00)
+  %max = tail call noundef <4 x float> @llvm.x86.sse.min.ps(<4 x float> %cvt, <4 x float> %sel)
+  ret <4 x float> %max
+}
+
+define <2 x double> @test_sse2_max_pd(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: define <2 x double> @test_sse2_max_pd(
+; CHECK-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CVT:%.*]] = sitofp <2 x i64> [[X]] to <2 x double>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i64> [[Y]], splat (i64 -1)
+; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x double> splat (double 1.000000e+00), <2 x double> splat (double -1.000000e+00)
+; CHECK-NEXT:    [[MAX:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[CVT]], <2 x double> [[SEL]])
+; CHECK-NEXT:    ret <2 x double> [[MAX]]
+;
+entry:
+  %cvt = sitofp <2 x i64> %x to <2 x double>
+  %cmp = icmp sgt <2 x i64> %y, splat (i64 -1)
+  %sel = select <2 x i1> %cmp, <2 x double> splat (double 1.000000e+00), <2 x double> splat (double -1.000000e+00)
+  %max = tail call noundef <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %cvt, <2 x double> %sel)
+  ret <2 x double> %max
+}
+
+define <2 x double> @test_sse2_min_pd(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: define <2 x double> @test_sse2_min_pd(
+; CHECK-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CVT:%.*]] = sitofp <2 x i64> [[X]] to <2 x double>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i64> [[Y]], splat (i64 -1)
+; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x double> splat (double 1.000000e+00), <2 x double> splat (double -1.000000e+00)
+; CHECK-NEXT:    [[MAX:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[CVT]], <2 x double> [[SEL]])
+; CHECK-NEXT:    ret <2 x double> [[MAX]]
+;
+entry:
+  %cvt = sitofp <2 x i64> %x to <2 x double>
+  %cmp = icmp sgt <2 x i64> %y, splat (i64 -1)
+  %sel = select <2 x i1> %cmp, <2 x double> splat (double 1.000000e+00), <2 x double> splat (double -1.000000e+00)
+  %max = tail call noundef <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %cvt, <2 x double> %sel)
+  ret <2 x double> %max
+}
+
+define <8 x float> @test_avx_max_ps(<8 x i32> %x, <8 x i32> %y) {
+; CHECK-LABEL: define <8 x float> @test_avx_max_ps(
+; CHECK-SAME: <8 x i32> [[X:%.*]], <8 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CVT:%.*]] = sitofp <8 x i32> [[X]] to <8 x float>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <8 x i32> [[Y]], splat (i32 -1)
+; CHECK-NEXT:    [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x float> splat (float 1.000000e+00), <8 x float> splat (float -1.000000e+00)
+; CHECK-NEXT:    [[MAX:%.*]] = call <8 x float> @llvm.maxnum.v8f32(<8 x float> [[CVT]], <8 x float> [[SEL]])
+; CHECK-NEXT:    ret <8 x float> [[MAX]]
+;
+entry:
+  %cvt = sitofp <8 x i32> %x to <8 x float>
+  %cmp = icmp sgt <8 x i32> %y, splat (i32 -1)
+  %sel = select <8 x i1> %cmp, <8 x float> splat (float 1.000000e+00), <8 x float> splat (float -1.000000e+00)
+  %max = tail call noundef <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %cvt, <8 x float> %sel)
+  ret <8 x float> %max
+}
+
+
+define <8 x float> @test_avx_min_ps(<8 x i32> %x, <8 x i32> %y) {
+; CHECK-LABEL: define <8 x float> @test_avx_min_ps(
+; CHECK-SAME: <8 x i32> [[X:%.*]], <8 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CVT:%.*]] = sitofp <8 x i32> [[X]] to <8 x float>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <8 x i32> [[Y]], splat (i32 -1)
+; CHECK-NEXT:    [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x float> splat (float 1.000000e+00), <8 x float> splat (float -1.000000e+00)
+; CHECK-NEXT:    [[MAX:%.*]] = call <8 x float> @llvm.minnum.v8f32(<8 x float> [[CVT]], <8 x float> [[SEL]])
+; CHECK-NEXT:    ret <8 x float> [[MAX]]
+;
+entry:
+  %cvt = sitofp <8 x i32> %x to <8 x float>
+  %cmp = icmp sgt <8 x i32> %y, splat (i32 -1)
+  %sel = select <8 x i1> %cmp, <8 x float> splat (float 1.000000e+00), <8 x float> splat (float -1.000000e+00)
+  %max = tail call noundef <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %cvt, <8 x float> %sel)
+  ret <8 x float> %max
+}
+
+define <32 x half> @test_avx512fp16_max_ph(<32 x i16> %x, <32 x i16> %y) {
+; CHECK-LABEL: define <32 x half> @test_avx512fp16_max_ph(
+; CHECK-SAME: <32 x i16> [[X:%.*]], <32 x i16> [[Y:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CVT:%.*]] = sitofp <32 x i16> [[X]] to <32 x half>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <32 x i16> [[Y]], splat (i16 -1)
+; CHECK-NEXT:    [[SEL:%.*]] = select <32 x i1> [[CMP]], <32 x half> splat (half 0xH3C00), <32 x half> splat (half 0xHBC00)
+; CHECK-NEXT:    [[MAX:%.*]] = call <32 x half> @llvm.maxnum.v32f16(<32 x half> [[CVT]], <32 x half> [[SEL]])
+; CHECK-NEXT:    ret <32 x half> [[MAX]]
+;
+entry:
+  %cvt = sitofp <32 x i16> %x to <32 x half>
+  %cmp = icmp sgt <32 x i16> %y, splat (i16 -1)
+  %sel = select <32 x i1> %cmp, <32 x half> splat (half 1.000000e+00), <32 x half> splat (half -1.000000e+00)
+  %max = tail call noundef <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half> %cvt, <32 x half> %sel, i32 4)
+  ret <32 x half> %max
+}
+
+define <32 x half> @test_avx512fp16_min_ph(<32 x i16> %x, <32 x i16> %y) {
+; CHECK-LABEL: define <32 x half> @test_avx512fp16_min_ph(
+; CHECK-SAME: <32 x i16> [[X:%.*]], <32 x i16> [[Y:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CVT:%.*]] = sitofp <32 x i16> [[X]] to <32 x half>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <32 x i16> [[Y]], splat (i16 -1)
+; CHECK-NEXT:    [[SEL:%.*]] = select <32 x i1> [[CMP]], <32 x half> splat (half 0xH3C00), <32 x half> splat (half 0xHBC00)
+; CHECK-NEXT:    [[MAX:%.*]] = call <32 x half> @llvm.minnum.v32f16(<32 x half> [[CVT]], <32 x half> [[SEL]])
+; CHECK-NEXT:    ret <32 x half> [[MAX]]
+;
+entry:
+  %cvt = sitofp <32 x i16> %x to <32 x half>
+  %cmp = icmp sgt <32 x i16> %y, splat (i16 -1)
+  %sel = select <32 x i1> %cmp, <32 x half> splat (half 1.000000e+00), <32 x half> splat (half -1.000000e+00)
+  %max = tail call noundef <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half> %cvt, <32 x half> %sel, i32 4)
+  ret <32 x half> %max
+}
+
+
+; Negative Test Cases (Operands Could be NaN)
+
+define <4 x float> @test_sse_max_ps_unknown(<4 x i32> %x, <4 x float> %y) {
+; CHECK-LABEL: define <4 x float> @test_sse_max_ps_nan(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x float> [[Y:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CVT:%.*]] = sitofp <4 x i32> [[X]] to <4 x float>
+; CHECK-NEXT:    [[MAX:%.*]] = tail call noundef <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[CVT]], <4 x float> [[Y]])
+; CHECK-NEXT:    ret <4 x float> [[MAX]]
+;
+entry:
+  %cvt = sitofp <4 x i32> %x to <4 x float>
+  %max = tail call noundef <4 x float> @llvm.x86.sse.max.ps(<4 x float> %cvt, <4 x float> %y)
+  ret <4 x float> %max
+}
+
+
+define <4 x float> @test_sse_max_ps_both_unknown(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: define <4 x float> @test_sse_max_ps_both_nan(
+; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[MAX:%.*]] = tail call noundef <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[X]], <4 x float> [[Y]])
+; CHECK-NEXT:    ret <4 x float> [[MAX]]
+;
+entry:
+  %max = tail call noundef <4 x float> @llvm.x86.sse.max.ps(<4 x float> %x, <4 x float> %y)
+  ret <4 x float> %max
+}
+

>From 5fe669d7828c5c8f84ba4aaa47437ef334aa2c0d Mon Sep 17 00:00:00 2001
From: neil <hello at neil.place>
Date: Thu, 8 Jan 2026 10:18:42 -0500
Subject: [PATCH 2/3] clang-format

---
 llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
index 047847fd9cbbc..5eff38b214aef 100644
--- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
@@ -1735,8 +1735,8 @@ static Value *simplifyTernarylogic(const IntrinsicInst &II,
 }
 
 static Value *simplifyX86FPMaxMin(const IntrinsicInst &II,
-                                InstCombiner::BuilderTy &Builder,
-                                Intrinsic::ID NewIID) {
+                                  InstCombiner::BuilderTy &Builder,
+                                  Intrinsic::ID NewIID) {
 
   Value *Arg0 = II.getArgOperand(0);
   Value *Arg1 = II.getArgOperand(1);

>From 22e3d2e66bd100e91a5dc1959d3560a81abb9382 Mon Sep 17 00:00:00 2001
From: neil <hello at neil.place>
Date: Thu, 8 Jan 2026 11:27:06 -0500
Subject: [PATCH 3/3] Fix test case names

---
 llvm/test/Transforms/InstCombine/X86/x86-maxmin.ll | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/X86/x86-maxmin.ll b/llvm/test/Transforms/InstCombine/X86/x86-maxmin.ll
index 3aa8a6d0f9a4f..dd87c9e8c1836 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-maxmin.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-maxmin.ll
@@ -159,7 +159,7 @@ entry:
 ; Negative Test Cases (Operands Could be NaN)
 
 define <4 x float> @test_sse_max_ps_unknown(<4 x i32> %x, <4 x float> %y) {
-; CHECK-LABEL: define <4 x float> @test_sse_max_ps_nan(
+; CHECK-LABEL: define <4 x float> @test_sse_max_ps_unknown(
 ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x float> [[Y:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[CVT:%.*]] = sitofp <4 x i32> [[X]] to <4 x float>
@@ -174,7 +174,7 @@ entry:
 
 
 define <4 x float> @test_sse_max_ps_both_unknown(<4 x float> %x, <4 x float> %y) {
-; CHECK-LABEL: define <4 x float> @test_sse_max_ps_both_nan(
+; CHECK-LABEL: define <4 x float> @test_sse_max_ps_both_unknown(
 ; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[MAX:%.*]] = tail call noundef <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[X]], <4 x float> [[Y]])