[llvm] [InstCombine] Avoid folding `select(umin(X, Y), X)` with min/max values in false arm (PR #143020)

Fri Jun 13 03:49:44 PDT 2025

https://github.com/thevar1able updated https://github.com/llvm/llvm-project/pull/143020

>From 084efd92871d5ac2f7dace668cb43986954defc4 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Thu, 5 Jun 2025 20:47:32 +0200
Subject: [PATCH 01/13] Prototype fix

---
 .../InstCombine/InstCombineCalls.cpp          | 19 +++++++++++++++++++
 llvm/test/Transforms/InstCombine/select.ll    | 15 +++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index cfb4af391b540..930819d24393a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1654,6 +1654,25 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
   if (Value *FreedOp = getFreedOperand(&CI, &TLI))
     return visitFree(CI, FreedOp);
 
+  if (Function *F = CI.getCalledFunction()) {
+    if (F->getIntrinsicID() == Intrinsic::umin || F->getIntrinsicID() == Intrinsic::umax) {
+      for (Value *Arg : CI.args()) {
+        auto *SI = dyn_cast<SelectInst>(Arg);
+        if (!SI)
+          continue;
+
+        auto *TrueC = dyn_cast<Constant>(SI->getTrueValue());
+        auto *FalseC = dyn_cast<Constant>(SI->getFalseValue());
+
+        // Block only if the select is masking, e.g. select(cond, val, -1)
+        if ((TrueC && TrueC->isAllOnesValue()) || (FalseC && FalseC->isAllOnesValue())) {
+          LLVM_DEBUG(dbgs() << "InstCombine: skipping umin/umax folding for masked select\n");
+          return nullptr;
+        }
+      }
+    }
+  }
+
   // If the caller function (i.e. us, the function that contains this CallInst)
   // is nounwind, mark the call as nounwind, even if the callee isn't.
   if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index e16f6ad2cfc9b..09cb84cde07ca 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -5047,3 +5047,18 @@ define <2 x ptr> @select_freeze_constant_expression_vector_gep(i1 %cond, <2 x pt
   %sel = select i1 %cond, <2 x ptr> %y, <2 x ptr> %freeze
   ret <2 x ptr> %sel
 }
+
+declare i8 @llvm.umin.i8(i8, i8)
+
+define i8 @no_fold_masked_min(i8 %acc, i8 %val, i8 %mask) {
+; CHECK-LABEL: @no_fold_masked_min(
+; CHECK-NEXT:  [[COND:%.*]] = icmp eq i8 [[MASK:%.*]], 0
+; CHECK-NEXT:  [[MASKED_VAL:%.*]] = select i1 [[COND:%.*]], i8 [[VAL:%.*]], i8 -1
+; CHECK-NEXT:  [[RES:%.*]] = call i8 @llvm.umin.i8(i8 [[ACC:%.*]], i8 [[MASKED_VAL:%.*]])
+; CHECK-NEXT:  ret i8 [[RES]]
+;
+  %cond = icmp eq i8 %mask, 0
+  %masked_val = select i1 %cond, i8 %val, i8 -1
+  %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
+  ret i8 %res
+}

>From 1b3e66224eef8f475e4f6cf60865b2b10901a33d Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Thu, 5 Jun 2025 21:03:53 +0200
Subject: [PATCH 02/13] Fix formatting

---
 llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 930819d24393a..0e5c95c7445dd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1655,7 +1655,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
     return visitFree(CI, FreedOp);
 
   if (Function *F = CI.getCalledFunction()) {
-    if (F->getIntrinsicID() == Intrinsic::umin || F->getIntrinsicID() == Intrinsic::umax) {
+    if (F->getIntrinsicID() == Intrinsic::umin ||
+        F->getIntrinsicID() == Intrinsic::umax) {
       for (Value *Arg : CI.args()) {
         auto *SI = dyn_cast<SelectInst>(Arg);
         if (!SI)
@@ -1665,8 +1666,11 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
         auto *FalseC = dyn_cast<Constant>(SI->getFalseValue());
 
         // Block only if the select is masking, e.g. select(cond, val, -1)
-        if ((TrueC && TrueC->isAllOnesValue()) || (FalseC && FalseC->isAllOnesValue())) {
-          LLVM_DEBUG(dbgs() << "InstCombine: skipping umin/umax folding for masked select\n");
+        if ((TrueC && TrueC->isAllOnesValue()) ||
+            (FalseC && FalseC->isAllOnesValue())) {
+          LLVM_DEBUG(
+              dbgs()
+              << "InstCombine: skipping umin/umax folding for masked select\n");
           return nullptr;
         }
       }

>From d87b193e4832ad2984df5a72d1e2aeda614662bc Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Thu, 5 Jun 2025 23:04:05 +0200
Subject: [PATCH 03/13] Move checks to FoldOpIntoSelect

---
 .../InstCombine/InstCombineCalls.cpp          | 23 -------------------
 .../InstCombine/InstructionCombining.cpp      | 19 +++++++++++++++
 2 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 0e5c95c7445dd..cfb4af391b540 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1654,29 +1654,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
   if (Value *FreedOp = getFreedOperand(&CI, &TLI))
     return visitFree(CI, FreedOp);
 
-  if (Function *F = CI.getCalledFunction()) {
-    if (F->getIntrinsicID() == Intrinsic::umin ||
-        F->getIntrinsicID() == Intrinsic::umax) {
-      for (Value *Arg : CI.args()) {
-        auto *SI = dyn_cast<SelectInst>(Arg);
-        if (!SI)
-          continue;
-
-        auto *TrueC = dyn_cast<Constant>(SI->getTrueValue());
-        auto *FalseC = dyn_cast<Constant>(SI->getFalseValue());
-
-        // Block only if the select is masking, e.g. select(cond, val, -1)
-        if ((TrueC && TrueC->isAllOnesValue()) ||
-            (FalseC && FalseC->isAllOnesValue())) {
-          LLVM_DEBUG(
-              dbgs()
-              << "InstCombine: skipping umin/umax folding for masked select\n");
-          return nullptr;
-        }
-      }
-    }
-  }
-
   // If the caller function (i.e. us, the function that contains this CallInst)
   // is nounwind, mark the call as nounwind, even if the callee isn't.
   if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 439a86d951a83..29211b4ac0ad1 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1719,6 +1719,25 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
   if (SI->getType()->isIntOrIntVectorTy(1))
     return nullptr;
 
+  if (auto *II = dyn_cast<IntrinsicInst>(&Op)) {
+    switch (II->getIntrinsicID()) {
+      case Intrinsic::umin:
+      case Intrinsic::smin:
+        if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
+          if (C->isAllOnesValue())
+            return nullptr;
+        break;
+      case Intrinsic::umax:
+      case Intrinsic::smax:
+        if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
+          if (C->isZero())
+            return nullptr;
+        break;
+      default:
+        break;
+    }
+  }
+
   // Test if a FCmpInst instruction is used exclusively by a select as
   // part of a minimum or maximum operation. If so, refrain from doing
   // any other folding. This helps out other analyses which understand

>From 69eaf81f28214760c080296c7aed23250668a6b9 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Thu, 5 Jun 2025 23:13:30 +0200
Subject: [PATCH 04/13] Fix formatting

---
 .../InstCombine/InstructionCombining.cpp      | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 29211b4ac0ad1..53efd629ea19d 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1721,20 +1721,20 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
 
   if (auto *II = dyn_cast<IntrinsicInst>(&Op)) {
     switch (II->getIntrinsicID()) {
-      case Intrinsic::umin:
-      case Intrinsic::smin:
-        if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
-          if (C->isAllOnesValue())
-            return nullptr;
-        break;
-      case Intrinsic::umax:
-      case Intrinsic::smax:
-        if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
-          if (C->isZero())
-            return nullptr;
-        break;
-      default:
-        break;
+    case Intrinsic::umin:
+    case Intrinsic::smin:
+      if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
+        if (C->isAllOnesValue())
+          return nullptr;
+      break;
+    case Intrinsic::umax:
+    case Intrinsic::smax:
+      if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
+        if (C->isZero())
+          return nullptr;
+      break;
+    default:
+      break;
     }
   }
 

>From 8240c6c8eedb3a499e4f3f0c457fd847e86da10e Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Sat, 7 Jun 2025 08:07:36 +0200
Subject: [PATCH 05/13] Apply suggested fix

---
 .../InstCombine/InstructionCombining.cpp      | 24 ++++------------
 llvm/test/Transforms/InstCombine/select.ll    | 28 +++++++++----------
 2 files changed, 20 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 53efd629ea19d..23a3c87a640e1 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1719,24 +1719,12 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
   if (SI->getType()->isIntOrIntVectorTy(1))
     return nullptr;
 
-  if (auto *II = dyn_cast<IntrinsicInst>(&Op)) {
-    switch (II->getIntrinsicID()) {
-    case Intrinsic::umin:
-    case Intrinsic::smin:
-      if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
-        if (C->isAllOnesValue())
-          return nullptr;
-      break;
-    case Intrinsic::umax:
-    case Intrinsic::smax:
-      if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
-        if (C->isZero())
-          return nullptr;
-      break;
-    default:
-      break;
-    }
-  }
+  if (isa<MinMaxIntrinsic>(&Op))
+    for (Value *IntrinOp : Op.operands())
+      if (auto *PN = dyn_cast<PHINode>(IntrinOp))
+        for (Value *PhiOp : PN->operands())
+          if (PhiOp == &Op)
+            return nullptr;
 
   // Test if a FCmpInst instruction is used exclusively by a select as
   // part of a minimum or maximum operation. If so, refrain from doing
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index 09cb84cde07ca..13464187e6e81 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -5048,17 +5048,17 @@ define <2 x ptr> @select_freeze_constant_expression_vector_gep(i1 %cond, <2 x pt
   ret <2 x ptr> %sel
 }
 
-declare i8 @llvm.umin.i8(i8, i8)
-
-define i8 @no_fold_masked_min(i8 %acc, i8 %val, i8 %mask) {
-; CHECK-LABEL: @no_fold_masked_min(
-; CHECK-NEXT:  [[COND:%.*]] = icmp eq i8 [[MASK:%.*]], 0
-; CHECK-NEXT:  [[MASKED_VAL:%.*]] = select i1 [[COND:%.*]], i8 [[VAL:%.*]], i8 -1
-; CHECK-NEXT:  [[RES:%.*]] = call i8 @llvm.umin.i8(i8 [[ACC:%.*]], i8 [[MASKED_VAL:%.*]])
-; CHECK-NEXT:  ret i8 [[RES]]
-;
-  %cond = icmp eq i8 %mask, 0
-  %masked_val = select i1 %cond, i8 %val, i8 -1
-  %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
-  ret i8 %res
-}
+; declare i8 @llvm.umin.i8(i8, i8)
+;
+; define i8 @no_fold_masked_min(i8 %acc, i8 %val, i8 %mask) {
+; ; CHECK-LABEL: @no_fold_masked_min(
+; ; CHECK-NEXT:  [[COND:%.*]] = icmp eq i8 [[MASK:%.*]], 0
+; ; CHECK-NEXT:  [[MASKED_VAL:%.*]] = select i1 [[COND:%.*]], i8 [[VAL:%.*]], i8 -1
+; ; CHECK-NEXT:  [[RES:%.*]] = call i8 @llvm.umin.i8(i8 [[ACC:%.*]], i8 [[MASKED_VAL:%.*]])
+; ; CHECK-NEXT:  ret i8 [[RES]]
+; ;
+;   %cond = icmp eq i8 %mask, 0
+;   %masked_val = select i1 %cond, i8 %val, i8 -1
+;   %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
+;   ret i8 %res
+; }

>From 8a7663b66339bfd3cf59bbbeb488b1dffd0fb533 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Wed, 11 Jun 2025 22:29:22 +0200
Subject: [PATCH 06/13] Update expected output

---
 .../PhaseOrdering/X86/vector-reductions.ll    | 48 +++++++++++++++----
 1 file changed, 38 insertions(+), 10 deletions(-)

diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
index f8450766037b2..8ee3345a963a9 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
@@ -332,20 +332,48 @@ define i8 @masked_min_reduction(ptr %data, ptr %mask) {
 ; CHECK-LABEL: @masked_min_reduction(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       loop:
+; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ACC:%.*]] = phi i8 [ -1, [[ENTRY]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <32 x i8> [ <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[ENTRY]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <32 x i8> [ <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[ENTRY]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <32 x i8> [ <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[ENTRY]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <32 x i8> [ <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[ENTRY]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[DATA:%.*]] = getelementptr i8, ptr [[DATA1:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[VAL:%.*]] = load i8, ptr [[DATA]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[DATA]], i64 32
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[DATA]], i64 64
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[DATA]], i64 96
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[DATA]], align 1
+; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <32 x i8>, ptr [[TMP1]], align 1
+; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <32 x i8>, ptr [[TMP2]], align 1
+; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <32 x i8>, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[MASK:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[M:%.*]] = load i8, ptr [[TMP7]], align 1
-; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 [[M]], 0
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.umin.i8(i8 [[ACC]], i8 [[VAL]])
-; CHECK-NEXT:    [[TMP21]] = select i1 [[COND]], i8 [[TMP0]], i8 [[ACC]]
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP7]], i64 32
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP7]], i64 64
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr i8, ptr [[TMP7]], i64 96
+; CHECK-NEXT:    [[WIDE_LOAD7:%.*]] = load <32 x i8>, ptr [[TMP7]], align 1
+; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <32 x i8>, ptr [[TMP5]], align 1
+; CHECK-NEXT:    [[WIDE_LOAD9:%.*]] = load <32 x i8>, ptr [[TMP6]], align 1
+; CHECK-NEXT:    [[WIDE_LOAD10:%.*]] = load <32 x i8>, ptr [[TMP22]], align 1
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD7]], zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD8]], zeroinitializer
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD9]], zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD10]], zeroinitializer
+; CHECK-NEXT:    [[TMP12:%.*]] = select <32 x i1> [[TMP8]], <32 x i8> [[WIDE_LOAD]], <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP13:%.*]] = select <32 x i1> [[TMP9]], <32 x i8> [[WIDE_LOAD4]], <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP14:%.*]] = select <32 x i1> [[TMP10]], <32 x i8> [[WIDE_LOAD5]], <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP15:%.*]] = select <32 x i1> [[TMP11]], <32 x i8> [[WIDE_LOAD6]], <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP16]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI]], <32 x i8> [[TMP12]])
+; CHECK-NEXT:    [[TMP17]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI1]], <32 x i8> [[TMP13]])
+; CHECK-NEXT:    [[TMP18]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI2]], <32 x i8> [[TMP14]])
+; CHECK-NEXT:    [[TMP19]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI3]], <32 x i8> [[TMP15]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 128
 ; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT:    br i1 [[TMP20]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
-; CHECK:       exit:
+; CHECK-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[RDX_MINMAX:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[TMP16]], <32 x i8> [[TMP17]])
+; CHECK-NEXT:    [[RDX_MINMAX11:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[RDX_MINMAX]], <32 x i8> [[TMP18]])
+; CHECK-NEXT:    [[RDX_MINMAX12:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[RDX_MINMAX11]], <32 x i8> [[TMP19]])
+; CHECK-NEXT:    [[TMP21:%.*]] = tail call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> [[RDX_MINMAX12]])
 ; CHECK-NEXT:    ret i8 [[TMP21]]
 ;
 entry:

>From 7929ed17610d2ff709392b9f3124f00ad274bede Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Wed, 11 Jun 2025 22:58:08 +0200
Subject: [PATCH 07/13] Add a comment

---
 llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 23a3c87a640e1..291c738e94b85 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1719,6 +1719,8 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
   if (SI->getType()->isIntOrIntVectorTy(1))
     return nullptr;
 
+  // Avoid breaking min/max reduction pattern,
+  // which is necessary for vectorization later.
   if (isa<MinMaxIntrinsic>(&Op))
     for (Value *IntrinOp : Op.operands())
       if (auto *PN = dyn_cast<PHINode>(IntrinOp))

>From 22ef2528d9299e9b02ef5b5535f6c8900bdf377d Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Wed, 11 Jun 2025 23:00:38 +0200
Subject: [PATCH 08/13] Add a test

---
 llvm/test/Transforms/InstCombine/select.ll    | 76 +++++++++++++++----
 .../PhaseOrdering/X86/vector-reductions.ll    |  2 -
 2 files changed, 62 insertions(+), 16 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index 13464187e6e81..937a5f0360a1d 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -5048,17 +5048,65 @@ define <2 x ptr> @select_freeze_constant_expression_vector_gep(i1 %cond, <2 x pt
   ret <2 x ptr> %sel
 }
 
-; declare i8 @llvm.umin.i8(i8, i8)
-;
-; define i8 @no_fold_masked_min(i8 %acc, i8 %val, i8 %mask) {
-; ; CHECK-LABEL: @no_fold_masked_min(
-; ; CHECK-NEXT:  [[COND:%.*]] = icmp eq i8 [[MASK:%.*]], 0
-; ; CHECK-NEXT:  [[MASKED_VAL:%.*]] = select i1 [[COND:%.*]], i8 [[VAL:%.*]], i8 -1
-; ; CHECK-NEXT:  [[RES:%.*]] = call i8 @llvm.umin.i8(i8 [[ACC:%.*]], i8 [[MASKED_VAL:%.*]])
-; ; CHECK-NEXT:  ret i8 [[RES]]
-; ;
-;   %cond = icmp eq i8 %mask, 0
-;   %masked_val = select i1 %cond, i8 %val, i8 -1
-;   %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
-;   ret i8 %res
-; }
+declare i8 @llvm.umin.i8(i8, i8)
+
+define i8 @no_fold_masked_min(i8 %acc, i8 %val, i8 %mask) {
+; CHECK-LABEL: @no_fold_masked_min(
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 [[MASK:%.*]], 0
+; CHECK-NEXT:    [[MASKED_VAL:%.*]] = select i1 [[COND]], i8 [[VAL:%.*]], i8 -1
+; CHECK-NEXT:    [[RES:%.*]] = call i8 @llvm.umin.i8(i8 [[ACC:%.*]], i8 [[MASKED_VAL]])
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+  %cond = icmp eq i8 %mask, 0
+  %masked_val = select i1 %cond, i8 %val, i8 -1
+  %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
+  ret i8 %res
+}
+
+define void @no_fold_masked_min_loop(ptr nocapture readonly %vals, ptr nocapture readonly %masks, ptr nocapture %out, i64 %n) {
+; CHECK-LABEL: @no_fold_masked_min_loop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXT_INDEX:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ACC:%.*]] = phi i8 [ -1, [[ENTRY]] ], [ [[RES:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[VAL_PTR:%.*]] = getelementptr inbounds i8, ptr [[VALS:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[MASK_PTR:%.*]] = getelementptr inbounds i8, ptr [[MASKS:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[VAL:%.*]] = load i8, ptr [[VAL_PTR]], align 1
+; CHECK-NEXT:    [[MASK:%.*]] = load i8, ptr [[MASK_PTR]], align 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 [[MASK]], 0
+; CHECK-NEXT:    [[MASKED_VAL:%.*]] = select i1 [[COND]], i8 [[VAL]], i8 -1
+; CHECK-NEXT:    [[RES]] = call i8 @llvm.umin.i8(i8 [[ACC]], i8 [[MASKED_VAL]])
+; CHECK-NEXT:    [[NEXT_INDEX]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXT_INDEX]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    store i8 [[RES]], ptr [[OUT:%.*]], align 1
+; CHECK-NEXT:    ret void
+;
+
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [0, %entry], [%next_index, %loop]
+  %acc = phi i8 [255, %entry], [%res, %loop]
+
+  %val_ptr = getelementptr inbounds i8, ptr %vals, i64 %index
+  %mask_ptr = getelementptr inbounds i8, ptr %masks, i64 %index
+
+  %val = load i8, ptr %val_ptr, align 1
+  %mask = load i8, ptr %mask_ptr, align 1
+
+  %cond = icmp eq i8 %mask, 0
+  %masked_val = select i1 %cond, i8 %val, i8 -1
+  %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
+
+  %next_index = add i64 %index, 1
+  %done = icmp eq i64 %next_index, %n
+  br i1 %done, label %exit, label %loop
+
+exit:
+  store i8 %res, ptr %out, align 1
+  ret void
+}
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
index 8ee3345a963a9..5e679e692fd8d 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
@@ -326,8 +326,6 @@ cleanup:
   ret i1 %retval.0
 }
 
-; From https://github.com/llvm/llvm-project/issues/139050.
-; FIXME: This should be vectorized.
 define i8 @masked_min_reduction(ptr %data, ptr %mask) {
 ; CHECK-LABEL: @masked_min_reduction(
 ; CHECK-NEXT:  entry:

>From d424540814671d5cd83feed87fddcf467860119f Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Wed, 11 Jun 2025 23:30:21 +0200
Subject: [PATCH 09/13] Update tests assertions

---
 llvm/test/Transforms/InstCombine/select.ll       |  1 -
 .../PhaseOrdering/X86/vector-reductions.ll       | 16 ++++++++--------
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index 937a5f0360a1d..67143bfe8f65c 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -5084,7 +5084,6 @@ define void @no_fold_masked_min_loop(ptr nocapture readonly %vals, ptr nocapture
 ; CHECK-NEXT:    store i8 [[RES]], ptr [[OUT:%.*]], align 1
 ; CHECK-NEXT:    ret void
 ;
-
 entry:
   br label %loop
 
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
index 5e679e692fd8d..2ec48a8637dae 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
@@ -332,10 +332,10 @@ define i8 @masked_min_reduction(ptr %data, ptr %mask) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <32 x i8> [ <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[ENTRY]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <32 x i8> [ <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[ENTRY]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <32 x i8> [ <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[ENTRY]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <32 x i8> [ <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[ENTRY]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <32 x i8> [ splat (i8 -1), [[ENTRY]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <32 x i8> [ splat (i8 -1), [[ENTRY]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <32 x i8> [ splat (i8 -1), [[ENTRY]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <32 x i8> [ splat (i8 -1), [[ENTRY]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[DATA:%.*]] = getelementptr i8, ptr [[DATA1:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[DATA]], i64 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[DATA]], i64 64
@@ -356,10 +356,10 @@ define i8 @masked_min_reduction(ptr %data, ptr %mask) {
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD8]], zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD9]], zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD10]], zeroinitializer
-; CHECK-NEXT:    [[TMP12:%.*]] = select <32 x i1> [[TMP8]], <32 x i8> [[WIDE_LOAD]], <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-; CHECK-NEXT:    [[TMP13:%.*]] = select <32 x i1> [[TMP9]], <32 x i8> [[WIDE_LOAD4]], <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-; CHECK-NEXT:    [[TMP14:%.*]] = select <32 x i1> [[TMP10]], <32 x i8> [[WIDE_LOAD5]], <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-; CHECK-NEXT:    [[TMP15:%.*]] = select <32 x i1> [[TMP11]], <32 x i8> [[WIDE_LOAD6]], <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP12:%.*]] = select <32 x i1> [[TMP8]], <32 x i8> [[WIDE_LOAD]], <32 x i8> splat (i8 -1)
+; CHECK-NEXT:    [[TMP13:%.*]] = select <32 x i1> [[TMP9]], <32 x i8> [[WIDE_LOAD4]], <32 x i8> splat (i8 -1)
+; CHECK-NEXT:    [[TMP14:%.*]] = select <32 x i1> [[TMP10]], <32 x i8> [[WIDE_LOAD5]], <32 x i8> splat (i8 -1)
+; CHECK-NEXT:    [[TMP15:%.*]] = select <32 x i1> [[TMP11]], <32 x i8> [[WIDE_LOAD6]], <32 x i8> splat (i8 -1)
 ; CHECK-NEXT:    [[TMP16]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI]], <32 x i8> [[TMP12]])
 ; CHECK-NEXT:    [[TMP17]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI1]], <32 x i8> [[TMP13]])
 ; CHECK-NEXT:    [[TMP18]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI2]], <32 x i8> [[TMP14]])

>From 304cf20e9def8081c7234f7d3552e4d42e8504e1 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Wed, 11 Jun 2025 23:44:41 +0200
Subject: [PATCH 10/13] Revert suggested fix

---
 .../InstCombine/InstructionCombining.cpp      | 22 ++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 291c738e94b85..fa79b12b47bff 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1721,12 +1721,24 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
 
   // Avoid breaking min/max reduction pattern,
   // which is necessary for vectorization later.
-  if (isa<MinMaxIntrinsic>(&Op))
-    for (Value *IntrinOp : Op.operands())
-      if (auto *PN = dyn_cast<PHINode>(IntrinOp))
-        for (Value *PhiOp : PN->operands())
-          if (PhiOp == &Op)
+  if (auto *II = dyn_cast<IntrinsicInst>(&Op)) {
+    switch (II->getIntrinsicID()) {
+      case Intrinsic::umin:
+      case Intrinsic::smin:
+        if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
+          if (C->isAllOnesValue())
             return nullptr;
+        break;
+      case Intrinsic::umax:
+      case Intrinsic::smax:
+        if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
+          if (C->isZero())
+            return nullptr;
+        break;
+      default:
+        break;
+    }
+  }
 
   // Test if a FCmpInst instruction is used exclusively by a select as
   // part of a minimum or maximum operation. If so, refrain from doing

>From 1165581c55aaf2a089e486665d95bc30c28ce0c2 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Wed, 11 Jun 2025 23:46:02 +0200
Subject: [PATCH 11/13] Update tests assertions

---
 .../PhaseOrdering/X86/vector-reductions.ll       | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
index 2ec48a8637dae..45632e878021e 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
@@ -356,14 +356,14 @@ define i8 @masked_min_reduction(ptr %data, ptr %mask) {
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD8]], zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD9]], zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD10]], zeroinitializer
-; CHECK-NEXT:    [[TMP12:%.*]] = select <32 x i1> [[TMP8]], <32 x i8> [[WIDE_LOAD]], <32 x i8> splat (i8 -1)
-; CHECK-NEXT:    [[TMP13:%.*]] = select <32 x i1> [[TMP9]], <32 x i8> [[WIDE_LOAD4]], <32 x i8> splat (i8 -1)
-; CHECK-NEXT:    [[TMP14:%.*]] = select <32 x i1> [[TMP10]], <32 x i8> [[WIDE_LOAD5]], <32 x i8> splat (i8 -1)
-; CHECK-NEXT:    [[TMP15:%.*]] = select <32 x i1> [[TMP11]], <32 x i8> [[WIDE_LOAD6]], <32 x i8> splat (i8 -1)
-; CHECK-NEXT:    [[TMP16]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI]], <32 x i8> [[TMP12]])
-; CHECK-NEXT:    [[TMP17]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI1]], <32 x i8> [[TMP13]])
-; CHECK-NEXT:    [[TMP18]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI2]], <32 x i8> [[TMP14]])
-; CHECK-NEXT:    [[TMP19]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI3]], <32 x i8> [[TMP15]])
+; CHECK-NEXT:    [[TMP12:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI]], <32 x i8> [[WIDE_LOAD]])
+; CHECK-NEXT:    [[TMP16]] = select <32 x i1> [[TMP8]], <32 x i8> [[TMP12]], <32 x i8> [[VEC_PHI]]
+; CHECK-NEXT:    [[TMP14:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI1]], <32 x i8> [[WIDE_LOAD4]])
+; CHECK-NEXT:    [[TMP17]] = select <32 x i1> [[TMP9]], <32 x i8> [[TMP14]], <32 x i8> [[VEC_PHI1]]
+; CHECK-NEXT:    [[TMP23:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI2]], <32 x i8> [[WIDE_LOAD5]])
+; CHECK-NEXT:    [[TMP18]] = select <32 x i1> [[TMP10]], <32 x i8> [[TMP23]], <32 x i8> [[VEC_PHI2]]
+; CHECK-NEXT:    [[TMP24:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI3]], <32 x i8> [[WIDE_LOAD6]])
+; CHECK-NEXT:    [[TMP19]] = select <32 x i1> [[TMP11]], <32 x i8> [[TMP24]], <32 x i8> [[VEC_PHI3]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 128
 ; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]

>From 0d48fc43c52bd59ec26bbe717433dc4e9bc6ec21 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Thu, 12 Jun 2025 00:08:49 +0200
Subject: [PATCH 12/13] Reformat

---
 .../InstCombine/InstructionCombining.cpp      | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index fa79b12b47bff..b51328f111b94 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1723,20 +1723,20 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
   // which is necessary for vectorization later.
   if (auto *II = dyn_cast<IntrinsicInst>(&Op)) {
     switch (II->getIntrinsicID()) {
-      case Intrinsic::umin:
-      case Intrinsic::smin:
-        if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
-          if (C->isAllOnesValue())
-            return nullptr;
-        break;
-      case Intrinsic::umax:
-      case Intrinsic::smax:
-        if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
-          if (C->isZero())
-            return nullptr;
-        break;
-      default:
-        break;
+    case Intrinsic::umin:
+    case Intrinsic::smin:
+      if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
+        if (C->isAllOnesValue())
+          return nullptr;
+      break;
+    case Intrinsic::umax:
+    case Intrinsic::smax:
+      if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
+        if (C->isZero())
+          return nullptr;
+      break;
+    default:
+      break;
     }
   }
 

>From 22183ade298ed277eb555aebd0f64af398e94e9e Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Fri, 13 Jun 2025 12:49:18 +0200
Subject: [PATCH 13/13] Reapply suggested fix

---
 .../InstCombine/InstructionCombining.cpp      | 24 +++++--------------
 llvm/test/Transforms/InstCombine/select.ll    | 15 ------------
 .../PhaseOrdering/X86/vector-reductions.ll    | 16 ++++++-------
 3 files changed, 14 insertions(+), 41 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index b51328f111b94..291c738e94b85 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1721,24 +1721,12 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
 
   // Avoid breaking min/max reduction pattern,
   // which is necessary for vectorization later.
-  if (auto *II = dyn_cast<IntrinsicInst>(&Op)) {
-    switch (II->getIntrinsicID()) {
-    case Intrinsic::umin:
-    case Intrinsic::smin:
-      if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
-        if (C->isAllOnesValue())
-          return nullptr;
-      break;
-    case Intrinsic::umax:
-    case Intrinsic::smax:
-      if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
-        if (C->isZero())
-          return nullptr;
-      break;
-    default:
-      break;
-    }
-  }
+  if (isa<MinMaxIntrinsic>(&Op))
+    for (Value *IntrinOp : Op.operands())
+      if (auto *PN = dyn_cast<PHINode>(IntrinOp))
+        for (Value *PhiOp : PN->operands())
+          if (PhiOp == &Op)
+            return nullptr;
 
   // Test if a FCmpInst instruction is used exclusively by a select as
   // part of a minimum or maximum operation. If so, refrain from doing
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index 67143bfe8f65c..ef5874ffd46ad 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -5048,21 +5048,6 @@ define <2 x ptr> @select_freeze_constant_expression_vector_gep(i1 %cond, <2 x pt
   ret <2 x ptr> %sel
 }
 
-declare i8 @llvm.umin.i8(i8, i8)
-
-define i8 @no_fold_masked_min(i8 %acc, i8 %val, i8 %mask) {
-; CHECK-LABEL: @no_fold_masked_min(
-; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 [[MASK:%.*]], 0
-; CHECK-NEXT:    [[MASKED_VAL:%.*]] = select i1 [[COND]], i8 [[VAL:%.*]], i8 -1
-; CHECK-NEXT:    [[RES:%.*]] = call i8 @llvm.umin.i8(i8 [[ACC:%.*]], i8 [[MASKED_VAL]])
-; CHECK-NEXT:    ret i8 [[RES]]
-;
-  %cond = icmp eq i8 %mask, 0
-  %masked_val = select i1 %cond, i8 %val, i8 -1
-  %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
-  ret i8 %res
-}
-
 define void @no_fold_masked_min_loop(ptr nocapture readonly %vals, ptr nocapture readonly %masks, ptr nocapture %out, i64 %n) {
 ; CHECK-LABEL: @no_fold_masked_min_loop(
 ; CHECK-NEXT:  entry:
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
index 45632e878021e..2ec48a8637dae 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
@@ -356,14 +356,14 @@ define i8 @masked_min_reduction(ptr %data, ptr %mask) {
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD8]], zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD9]], zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD10]], zeroinitializer
-; CHECK-NEXT:    [[TMP12:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI]], <32 x i8> [[WIDE_LOAD]])
-; CHECK-NEXT:    [[TMP16]] = select <32 x i1> [[TMP8]], <32 x i8> [[TMP12]], <32 x i8> [[VEC_PHI]]
-; CHECK-NEXT:    [[TMP14:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI1]], <32 x i8> [[WIDE_LOAD4]])
-; CHECK-NEXT:    [[TMP17]] = select <32 x i1> [[TMP9]], <32 x i8> [[TMP14]], <32 x i8> [[VEC_PHI1]]
-; CHECK-NEXT:    [[TMP23:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI2]], <32 x i8> [[WIDE_LOAD5]])
-; CHECK-NEXT:    [[TMP18]] = select <32 x i1> [[TMP10]], <32 x i8> [[TMP23]], <32 x i8> [[VEC_PHI2]]
-; CHECK-NEXT:    [[TMP24:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI3]], <32 x i8> [[WIDE_LOAD6]])
-; CHECK-NEXT:    [[TMP19]] = select <32 x i1> [[TMP11]], <32 x i8> [[TMP24]], <32 x i8> [[VEC_PHI3]]
+; CHECK-NEXT:    [[TMP12:%.*]] = select <32 x i1> [[TMP8]], <32 x i8> [[WIDE_LOAD]], <32 x i8> splat (i8 -1)
+; CHECK-NEXT:    [[TMP13:%.*]] = select <32 x i1> [[TMP9]], <32 x i8> [[WIDE_LOAD4]], <32 x i8> splat (i8 -1)
+; CHECK-NEXT:    [[TMP14:%.*]] = select <32 x i1> [[TMP10]], <32 x i8> [[WIDE_LOAD5]], <32 x i8> splat (i8 -1)
+; CHECK-NEXT:    [[TMP15:%.*]] = select <32 x i1> [[TMP11]], <32 x i8> [[WIDE_LOAD6]], <32 x i8> splat (i8 -1)
+; CHECK-NEXT:    [[TMP16]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI]], <32 x i8> [[TMP12]])
+; CHECK-NEXT:    [[TMP17]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI1]], <32 x i8> [[TMP13]])
+; CHECK-NEXT:    [[TMP18]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI2]], <32 x i8> [[TMP14]])
+; CHECK-NEXT:    [[TMP19]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI3]], <32 x i8> [[TMP15]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 128
 ; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]