[llvm] [InstCombine] Avoid folding `select(umin(X, Y), X)` with min/max values in false arm (PR #143020)
Konstantin Bogdanov via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 13 03:49:44 PDT 2025
https://github.com/thevar1able updated https://github.com/llvm/llvm-project/pull/143020
>From 084efd92871d5ac2f7dace668cb43986954defc4 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Thu, 5 Jun 2025 20:47:32 +0200
Subject: [PATCH 01/13] Prototype fix
---
.../InstCombine/InstCombineCalls.cpp | 19 +++++++++++++++++++
llvm/test/Transforms/InstCombine/select.ll | 15 +++++++++++++++
2 files changed, 34 insertions(+)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index cfb4af391b540..930819d24393a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1654,6 +1654,25 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Value *FreedOp = getFreedOperand(&CI, &TLI))
return visitFree(CI, FreedOp);
+ if (Function *F = CI.getCalledFunction()) {
+ if (F->getIntrinsicID() == Intrinsic::umin || F->getIntrinsicID() == Intrinsic::umax) {
+ for (Value *Arg : CI.args()) {
+ auto *SI = dyn_cast<SelectInst>(Arg);
+ if (!SI)
+ continue;
+
+ auto *TrueC = dyn_cast<Constant>(SI->getTrueValue());
+ auto *FalseC = dyn_cast<Constant>(SI->getFalseValue());
+
+ // Block only if the select is masking, e.g. select(cond, val, -1)
+ if ((TrueC && TrueC->isAllOnesValue()) || (FalseC && FalseC->isAllOnesValue())) {
+ LLVM_DEBUG(dbgs() << "InstCombine: skipping umin/umax folding for masked select\n");
+ return nullptr;
+ }
+ }
+ }
+ }
+
// If the caller function (i.e. us, the function that contains this CallInst)
// is nounwind, mark the call as nounwind, even if the callee isn't.
if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index e16f6ad2cfc9b..09cb84cde07ca 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -5047,3 +5047,18 @@ define <2 x ptr> @select_freeze_constant_expression_vector_gep(i1 %cond, <2 x pt
%sel = select i1 %cond, <2 x ptr> %y, <2 x ptr> %freeze
ret <2 x ptr> %sel
}
+
+declare i8 @llvm.umin.i8(i8, i8)
+
+define i8 @no_fold_masked_min(i8 %acc, i8 %val, i8 %mask) {
+; CHECK-LABEL: @no_fold_masked_min(
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[MASK:%.*]], 0
+; CHECK-NEXT: [[MASKED_VAL:%.*]] = select i1 [[COND:%.*]], i8 [[VAL:%.*]], i8 -1
+; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.umin.i8(i8 [[ACC:%.*]], i8 [[MASKED_VAL:%.*]])
+; CHECK-NEXT: ret i8 [[RES]]
+;
+ %cond = icmp eq i8 %mask, 0
+ %masked_val = select i1 %cond, i8 %val, i8 -1
+ %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
+ ret i8 %res
+}
>From 1b3e66224eef8f475e4f6cf60865b2b10901a33d Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Thu, 5 Jun 2025 21:03:53 +0200
Subject: [PATCH 02/13] Fix formatting
---
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 930819d24393a..0e5c95c7445dd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1655,7 +1655,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
return visitFree(CI, FreedOp);
if (Function *F = CI.getCalledFunction()) {
- if (F->getIntrinsicID() == Intrinsic::umin || F->getIntrinsicID() == Intrinsic::umax) {
+ if (F->getIntrinsicID() == Intrinsic::umin ||
+ F->getIntrinsicID() == Intrinsic::umax) {
for (Value *Arg : CI.args()) {
auto *SI = dyn_cast<SelectInst>(Arg);
if (!SI)
@@ -1665,8 +1666,11 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
auto *FalseC = dyn_cast<Constant>(SI->getFalseValue());
// Block only if the select is masking, e.g. select(cond, val, -1)
- if ((TrueC && TrueC->isAllOnesValue()) || (FalseC && FalseC->isAllOnesValue())) {
- LLVM_DEBUG(dbgs() << "InstCombine: skipping umin/umax folding for masked select\n");
+ if ((TrueC && TrueC->isAllOnesValue()) ||
+ (FalseC && FalseC->isAllOnesValue())) {
+ LLVM_DEBUG(
+ dbgs()
+ << "InstCombine: skipping umin/umax folding for masked select\n");
return nullptr;
}
}
>From d87b193e4832ad2984df5a72d1e2aeda614662bc Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Thu, 5 Jun 2025 23:04:05 +0200
Subject: [PATCH 03/13] Move checks to FoldOpIntoSelect
---
.../InstCombine/InstCombineCalls.cpp | 23 -------------------
.../InstCombine/InstructionCombining.cpp | 19 +++++++++++++++
2 files changed, 19 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 0e5c95c7445dd..cfb4af391b540 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1654,29 +1654,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Value *FreedOp = getFreedOperand(&CI, &TLI))
return visitFree(CI, FreedOp);
- if (Function *F = CI.getCalledFunction()) {
- if (F->getIntrinsicID() == Intrinsic::umin ||
- F->getIntrinsicID() == Intrinsic::umax) {
- for (Value *Arg : CI.args()) {
- auto *SI = dyn_cast<SelectInst>(Arg);
- if (!SI)
- continue;
-
- auto *TrueC = dyn_cast<Constant>(SI->getTrueValue());
- auto *FalseC = dyn_cast<Constant>(SI->getFalseValue());
-
- // Block only if the select is masking, e.g. select(cond, val, -1)
- if ((TrueC && TrueC->isAllOnesValue()) ||
- (FalseC && FalseC->isAllOnesValue())) {
- LLVM_DEBUG(
- dbgs()
- << "InstCombine: skipping umin/umax folding for masked select\n");
- return nullptr;
- }
- }
- }
- }
-
// If the caller function (i.e. us, the function that contains this CallInst)
// is nounwind, mark the call as nounwind, even if the callee isn't.
if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 439a86d951a83..29211b4ac0ad1 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1719,6 +1719,25 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
if (SI->getType()->isIntOrIntVectorTy(1))
return nullptr;
+ if (auto *II = dyn_cast<IntrinsicInst>(&Op)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::umin:
+ case Intrinsic::smin:
+ if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
+ if (C->isAllOnesValue())
+ return nullptr;
+ break;
+ case Intrinsic::umax:
+ case Intrinsic::smax:
+ if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
+ if (C->isZero())
+ return nullptr;
+ break;
+ default:
+ break;
+ }
+ }
+
// Test if a FCmpInst instruction is used exclusively by a select as
// part of a minimum or maximum operation. If so, refrain from doing
// any other folding. This helps out other analyses which understand
>From 69eaf81f28214760c080296c7aed23250668a6b9 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Thu, 5 Jun 2025 23:13:30 +0200
Subject: [PATCH 04/13] Fix formatting
---
.../InstCombine/InstructionCombining.cpp | 28 +++++++++----------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 29211b4ac0ad1..53efd629ea19d 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1721,20 +1721,20 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
if (auto *II = dyn_cast<IntrinsicInst>(&Op)) {
switch (II->getIntrinsicID()) {
- case Intrinsic::umin:
- case Intrinsic::smin:
- if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
- if (C->isAllOnesValue())
- return nullptr;
- break;
- case Intrinsic::umax:
- case Intrinsic::smax:
- if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
- if (C->isZero())
- return nullptr;
- break;
- default:
- break;
+ case Intrinsic::umin:
+ case Intrinsic::smin:
+ if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
+ if (C->isAllOnesValue())
+ return nullptr;
+ break;
+ case Intrinsic::umax:
+ case Intrinsic::smax:
+ if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
+ if (C->isZero())
+ return nullptr;
+ break;
+ default:
+ break;
}
}
>From 8240c6c8eedb3a499e4f3f0c457fd847e86da10e Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Sat, 7 Jun 2025 08:07:36 +0200
Subject: [PATCH 05/13] Apply suggested fix
---
.../InstCombine/InstructionCombining.cpp | 24 ++++------------
llvm/test/Transforms/InstCombine/select.ll | 28 +++++++++----------
2 files changed, 20 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 53efd629ea19d..23a3c87a640e1 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1719,24 +1719,12 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
if (SI->getType()->isIntOrIntVectorTy(1))
return nullptr;
- if (auto *II = dyn_cast<IntrinsicInst>(&Op)) {
- switch (II->getIntrinsicID()) {
- case Intrinsic::umin:
- case Intrinsic::smin:
- if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
- if (C->isAllOnesValue())
- return nullptr;
- break;
- case Intrinsic::umax:
- case Intrinsic::smax:
- if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
- if (C->isZero())
- return nullptr;
- break;
- default:
- break;
- }
- }
+ if (isa<MinMaxIntrinsic>(&Op))
+ for (Value *IntrinOp : Op.operands())
+ if (auto *PN = dyn_cast<PHINode>(IntrinOp))
+ for (Value *PhiOp : PN->operands())
+ if (PhiOp == &Op)
+ return nullptr;
// Test if a FCmpInst instruction is used exclusively by a select as
// part of a minimum or maximum operation. If so, refrain from doing
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index 09cb84cde07ca..13464187e6e81 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -5048,17 +5048,17 @@ define <2 x ptr> @select_freeze_constant_expression_vector_gep(i1 %cond, <2 x pt
ret <2 x ptr> %sel
}
-declare i8 @llvm.umin.i8(i8, i8)
-
-define i8 @no_fold_masked_min(i8 %acc, i8 %val, i8 %mask) {
-; CHECK-LABEL: @no_fold_masked_min(
-; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[MASK:%.*]], 0
-; CHECK-NEXT: [[MASKED_VAL:%.*]] = select i1 [[COND:%.*]], i8 [[VAL:%.*]], i8 -1
-; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.umin.i8(i8 [[ACC:%.*]], i8 [[MASKED_VAL:%.*]])
-; CHECK-NEXT: ret i8 [[RES]]
-;
- %cond = icmp eq i8 %mask, 0
- %masked_val = select i1 %cond, i8 %val, i8 -1
- %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
- ret i8 %res
-}
+; declare i8 @llvm.umin.i8(i8, i8)
+;
+; define i8 @no_fold_masked_min(i8 %acc, i8 %val, i8 %mask) {
+; ; CHECK-LABEL: @no_fold_masked_min(
+; ; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[MASK:%.*]], 0
+; ; CHECK-NEXT: [[MASKED_VAL:%.*]] = select i1 [[COND:%.*]], i8 [[VAL:%.*]], i8 -1
+; ; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.umin.i8(i8 [[ACC:%.*]], i8 [[MASKED_VAL:%.*]])
+; ; CHECK-NEXT: ret i8 [[RES]]
+; ;
+; %cond = icmp eq i8 %mask, 0
+; %masked_val = select i1 %cond, i8 %val, i8 -1
+; %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
+; ret i8 %res
+; }
>From 8a7663b66339bfd3cf59bbbeb488b1dffd0fb533 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Wed, 11 Jun 2025 22:29:22 +0200
Subject: [PATCH 06/13] Update expected output
---
.../PhaseOrdering/X86/vector-reductions.ll | 48 +++++++++++++++----
1 file changed, 38 insertions(+), 10 deletions(-)
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
index f8450766037b2..8ee3345a963a9 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
@@ -332,20 +332,48 @@ define i8 @masked_min_reduction(ptr %data, ptr %mask) {
; CHECK-LABEL: @masked_min_reduction(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: loop:
+; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[ACC:%.*]] = phi i8 [ -1, [[ENTRY]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <32 x i8> [ <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[ENTRY]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <32 x i8> [ <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[ENTRY]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <32 x i8> [ <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[ENTRY]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <32 x i8> [ <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[ENTRY]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[DATA:%.*]] = getelementptr i8, ptr [[DATA1:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[DATA]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[DATA]], i64 32
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DATA]], i64 64
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DATA]], i64 96
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[DATA]], align 1
+; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <32 x i8>, ptr [[TMP1]], align 1
+; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <32 x i8>, ptr [[TMP2]], align 1
+; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <32 x i8>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[MASK:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[M:%.*]] = load i8, ptr [[TMP7]], align 1
-; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[M]], 0
-; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.umin.i8(i8 [[ACC]], i8 [[VAL]])
-; CHECK-NEXT: [[TMP21]] = select i1 [[COND]], i8 [[TMP0]], i8 [[ACC]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP7]], i64 32
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP7]], i64 64
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[TMP7]], i64 96
+; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <32 x i8>, ptr [[TMP7]], align 1
+; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <32 x i8>, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <32 x i8>, ptr [[TMP6]], align 1
+; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <32 x i8>, ptr [[TMP22]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD7]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD8]], zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD9]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD10]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP8]], <32 x i8> [[WIDE_LOAD]], <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP9]], <32 x i8> [[WIDE_LOAD4]], <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT: [[TMP14:%.*]] = select <32 x i1> [[TMP10]], <32 x i8> [[WIDE_LOAD5]], <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP11]], <32 x i8> [[WIDE_LOAD6]], <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT: [[TMP16]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI]], <32 x i8> [[TMP12]])
+; CHECK-NEXT: [[TMP17]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI1]], <32 x i8> [[TMP13]])
+; CHECK-NEXT: [[TMP18]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI2]], <32 x i8> [[TMP14]])
+; CHECK-NEXT: [[TMP19]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI3]], <32 x i8> [[TMP15]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 128
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT: br i1 [[TMP20]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
-; CHECK: exit:
+; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[RDX_MINMAX:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[TMP16]], <32 x i8> [[TMP17]])
+; CHECK-NEXT: [[RDX_MINMAX11:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[RDX_MINMAX]], <32 x i8> [[TMP18]])
+; CHECK-NEXT: [[RDX_MINMAX12:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[RDX_MINMAX11]], <32 x i8> [[TMP19]])
+; CHECK-NEXT: [[TMP21:%.*]] = tail call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> [[RDX_MINMAX12]])
; CHECK-NEXT: ret i8 [[TMP21]]
;
entry:
>From 7929ed17610d2ff709392b9f3124f00ad274bede Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Wed, 11 Jun 2025 22:58:08 +0200
Subject: [PATCH 07/13] Add a comment
---
llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 23a3c87a640e1..291c738e94b85 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1719,6 +1719,8 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
if (SI->getType()->isIntOrIntVectorTy(1))
return nullptr;
+ // Avoid breaking min/max reduction pattern,
+ // which is necessary for vectorization later.
if (isa<MinMaxIntrinsic>(&Op))
for (Value *IntrinOp : Op.operands())
if (auto *PN = dyn_cast<PHINode>(IntrinOp))
>From 22ef2528d9299e9b02ef5b5535f6c8900bdf377d Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Wed, 11 Jun 2025 23:00:38 +0200
Subject: [PATCH 08/13] Add a test
---
llvm/test/Transforms/InstCombine/select.ll | 76 +++++++++++++++----
.../PhaseOrdering/X86/vector-reductions.ll | 2 -
2 files changed, 62 insertions(+), 16 deletions(-)
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index 13464187e6e81..937a5f0360a1d 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -5048,17 +5048,65 @@ define <2 x ptr> @select_freeze_constant_expression_vector_gep(i1 %cond, <2 x pt
ret <2 x ptr> %sel
}
-; declare i8 @llvm.umin.i8(i8, i8)
-;
-; define i8 @no_fold_masked_min(i8 %acc, i8 %val, i8 %mask) {
-; ; CHECK-LABEL: @no_fold_masked_min(
-; ; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[MASK:%.*]], 0
-; ; CHECK-NEXT: [[MASKED_VAL:%.*]] = select i1 [[COND:%.*]], i8 [[VAL:%.*]], i8 -1
-; ; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.umin.i8(i8 [[ACC:%.*]], i8 [[MASKED_VAL:%.*]])
-; ; CHECK-NEXT: ret i8 [[RES]]
-; ;
-; %cond = icmp eq i8 %mask, 0
-; %masked_val = select i1 %cond, i8 %val, i8 -1
-; %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
-; ret i8 %res
-; }
+declare i8 @llvm.umin.i8(i8, i8)
+
+define i8 @no_fold_masked_min(i8 %acc, i8 %val, i8 %mask) {
+; CHECK-LABEL: @no_fold_masked_min(
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[MASK:%.*]], 0
+; CHECK-NEXT: [[MASKED_VAL:%.*]] = select i1 [[COND]], i8 [[VAL:%.*]], i8 -1
+; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.umin.i8(i8 [[ACC:%.*]], i8 [[MASKED_VAL]])
+; CHECK-NEXT: ret i8 [[RES]]
+;
+ %cond = icmp eq i8 %mask, 0
+ %masked_val = select i1 %cond, i8 %val, i8 -1
+ %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
+ ret i8 %res
+}
+
+define void @no_fold_masked_min_loop(ptr nocapture readonly %vals, ptr nocapture readonly %masks, ptr nocapture %out, i64 %n) {
+; CHECK-LABEL: @no_fold_masked_min_loop(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXT_INDEX:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[ACC:%.*]] = phi i8 [ -1, [[ENTRY]] ], [ [[RES:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[VAL_PTR:%.*]] = getelementptr inbounds i8, ptr [[VALS:%.*]], i64 [[INDEX]]
+; CHECK-NEXT: [[MASK_PTR:%.*]] = getelementptr inbounds i8, ptr [[MASKS:%.*]], i64 [[INDEX]]
+; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[VAL_PTR]], align 1
+; CHECK-NEXT: [[MASK:%.*]] = load i8, ptr [[MASK_PTR]], align 1
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[MASK]], 0
+; CHECK-NEXT: [[MASKED_VAL:%.*]] = select i1 [[COND]], i8 [[VAL]], i8 -1
+; CHECK-NEXT: [[RES]] = call i8 @llvm.umin.i8(i8 [[ACC]], i8 [[MASKED_VAL]])
+; CHECK-NEXT: [[NEXT_INDEX]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXT_INDEX]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK: exit:
+; CHECK-NEXT: store i8 [[RES]], ptr [[OUT:%.*]], align 1
+; CHECK-NEXT: ret void
+;
+
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [0, %entry], [%next_index, %loop]
+ %acc = phi i8 [255, %entry], [%res, %loop]
+
+ %val_ptr = getelementptr inbounds i8, ptr %vals, i64 %index
+ %mask_ptr = getelementptr inbounds i8, ptr %masks, i64 %index
+
+ %val = load i8, ptr %val_ptr, align 1
+ %mask = load i8, ptr %mask_ptr, align 1
+
+ %cond = icmp eq i8 %mask, 0
+ %masked_val = select i1 %cond, i8 %val, i8 -1
+ %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
+
+ %next_index = add i64 %index, 1
+ %done = icmp eq i64 %next_index, %n
+ br i1 %done, label %exit, label %loop
+
+exit:
+ store i8 %res, ptr %out, align 1
+ ret void
+}
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
index 8ee3345a963a9..5e679e692fd8d 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
@@ -326,8 +326,6 @@ cleanup:
ret i1 %retval.0
}
-; From https://github.com/llvm/llvm-project/issues/139050.
-; FIXME: This should be vectorized.
define i8 @masked_min_reduction(ptr %data, ptr %mask) {
; CHECK-LABEL: @masked_min_reduction(
; CHECK-NEXT: entry:
>From d424540814671d5cd83feed87fddcf467860119f Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Wed, 11 Jun 2025 23:30:21 +0200
Subject: [PATCH 09/13] Update tests assertions
---
llvm/test/Transforms/InstCombine/select.ll | 1 -
.../PhaseOrdering/X86/vector-reductions.ll | 16 ++++++++--------
2 files changed, 8 insertions(+), 9 deletions(-)
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index 937a5f0360a1d..67143bfe8f65c 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -5084,7 +5084,6 @@ define void @no_fold_masked_min_loop(ptr nocapture readonly %vals, ptr nocapture
; CHECK-NEXT: store i8 [[RES]], ptr [[OUT:%.*]], align 1
; CHECK-NEXT: ret void
;
-
entry:
br label %loop
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
index 5e679e692fd8d..2ec48a8637dae 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
@@ -332,10 +332,10 @@ define i8 @masked_min_reduction(ptr %data, ptr %mask) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <32 x i8> [ <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[ENTRY]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <32 x i8> [ <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[ENTRY]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <32 x i8> [ <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[ENTRY]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <32 x i8> [ <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[ENTRY]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <32 x i8> [ splat (i8 -1), [[ENTRY]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <32 x i8> [ splat (i8 -1), [[ENTRY]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <32 x i8> [ splat (i8 -1), [[ENTRY]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <32 x i8> [ splat (i8 -1), [[ENTRY]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[DATA:%.*]] = getelementptr i8, ptr [[DATA1:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[DATA]], i64 32
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DATA]], i64 64
@@ -356,10 +356,10 @@ define i8 @masked_min_reduction(ptr %data, ptr %mask) {
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD8]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD9]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD10]], zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP8]], <32 x i8> [[WIDE_LOAD]], <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP9]], <32 x i8> [[WIDE_LOAD4]], <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-; CHECK-NEXT: [[TMP14:%.*]] = select <32 x i1> [[TMP10]], <32 x i8> [[WIDE_LOAD5]], <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP11]], <32 x i8> [[WIDE_LOAD6]], <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP8]], <32 x i8> [[WIDE_LOAD]], <32 x i8> splat (i8 -1)
+; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP9]], <32 x i8> [[WIDE_LOAD4]], <32 x i8> splat (i8 -1)
+; CHECK-NEXT: [[TMP14:%.*]] = select <32 x i1> [[TMP10]], <32 x i8> [[WIDE_LOAD5]], <32 x i8> splat (i8 -1)
+; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP11]], <32 x i8> [[WIDE_LOAD6]], <32 x i8> splat (i8 -1)
; CHECK-NEXT: [[TMP16]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI]], <32 x i8> [[TMP12]])
; CHECK-NEXT: [[TMP17]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI1]], <32 x i8> [[TMP13]])
; CHECK-NEXT: [[TMP18]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI2]], <32 x i8> [[TMP14]])
>From 304cf20e9def8081c7234f7d3552e4d42e8504e1 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Wed, 11 Jun 2025 23:44:41 +0200
Subject: [PATCH 10/13] Revert suggested fix
---
.../InstCombine/InstructionCombining.cpp | 22 ++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 291c738e94b85..fa79b12b47bff 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1721,12 +1721,24 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
// Avoid breaking min/max reduction pattern,
// which is necessary for vectorization later.
- if (isa<MinMaxIntrinsic>(&Op))
- for (Value *IntrinOp : Op.operands())
- if (auto *PN = dyn_cast<PHINode>(IntrinOp))
- for (Value *PhiOp : PN->operands())
- if (PhiOp == &Op)
+ if (auto *II = dyn_cast<IntrinsicInst>(&Op)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::umin:
+ case Intrinsic::smin:
+ if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
+ if (C->isAllOnesValue())
return nullptr;
+ break;
+ case Intrinsic::umax:
+ case Intrinsic::smax:
+ if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
+ if (C->isZero())
+ return nullptr;
+ break;
+ default:
+ break;
+ }
+ }
// Test if a FCmpInst instruction is used exclusively by a select as
// part of a minimum or maximum operation. If so, refrain from doing
>From 1165581c55aaf2a089e486665d95bc30c28ce0c2 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Wed, 11 Jun 2025 23:46:02 +0200
Subject: [PATCH 11/13] Update tests assertions
---
.../PhaseOrdering/X86/vector-reductions.ll | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
index 2ec48a8637dae..45632e878021e 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
@@ -356,14 +356,14 @@ define i8 @masked_min_reduction(ptr %data, ptr %mask) {
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD8]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD9]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD10]], zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP8]], <32 x i8> [[WIDE_LOAD]], <32 x i8> splat (i8 -1)
-; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP9]], <32 x i8> [[WIDE_LOAD4]], <32 x i8> splat (i8 -1)
-; CHECK-NEXT: [[TMP14:%.*]] = select <32 x i1> [[TMP10]], <32 x i8> [[WIDE_LOAD5]], <32 x i8> splat (i8 -1)
-; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP11]], <32 x i8> [[WIDE_LOAD6]], <32 x i8> splat (i8 -1)
-; CHECK-NEXT: [[TMP16]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI]], <32 x i8> [[TMP12]])
-; CHECK-NEXT: [[TMP17]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI1]], <32 x i8> [[TMP13]])
-; CHECK-NEXT: [[TMP18]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI2]], <32 x i8> [[TMP14]])
-; CHECK-NEXT: [[TMP19]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI3]], <32 x i8> [[TMP15]])
+; CHECK-NEXT: [[TMP12:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI]], <32 x i8> [[WIDE_LOAD]])
+; CHECK-NEXT: [[TMP16]] = select <32 x i1> [[TMP8]], <32 x i8> [[TMP12]], <32 x i8> [[VEC_PHI]]
+; CHECK-NEXT: [[TMP14:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI1]], <32 x i8> [[WIDE_LOAD4]])
+; CHECK-NEXT: [[TMP17]] = select <32 x i1> [[TMP9]], <32 x i8> [[TMP14]], <32 x i8> [[VEC_PHI1]]
+; CHECK-NEXT: [[TMP23:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI2]], <32 x i8> [[WIDE_LOAD5]])
+; CHECK-NEXT: [[TMP18]] = select <32 x i1> [[TMP10]], <32 x i8> [[TMP23]], <32 x i8> [[VEC_PHI2]]
+; CHECK-NEXT: [[TMP24:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI3]], <32 x i8> [[WIDE_LOAD6]])
+; CHECK-NEXT: [[TMP19]] = select <32 x i1> [[TMP11]], <32 x i8> [[TMP24]], <32 x i8> [[VEC_PHI3]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 128
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
>From 0d48fc43c52bd59ec26bbe717433dc4e9bc6ec21 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Thu, 12 Jun 2025 00:08:49 +0200
Subject: [PATCH 12/13] Reformat
---
.../InstCombine/InstructionCombining.cpp | 28 +++++++++----------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index fa79b12b47bff..b51328f111b94 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1723,20 +1723,20 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
// which is necessary for vectorization later.
if (auto *II = dyn_cast<IntrinsicInst>(&Op)) {
switch (II->getIntrinsicID()) {
- case Intrinsic::umin:
- case Intrinsic::smin:
- if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
- if (C->isAllOnesValue())
- return nullptr;
- break;
- case Intrinsic::umax:
- case Intrinsic::smax:
- if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
- if (C->isZero())
- return nullptr;
- break;
- default:
- break;
+ case Intrinsic::umin:
+ case Intrinsic::smin:
+ if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
+ if (C->isAllOnesValue())
+ return nullptr;
+ break;
+ case Intrinsic::umax:
+ case Intrinsic::smax:
+ if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
+ if (C->isZero())
+ return nullptr;
+ break;
+ default:
+ break;
}
}
>From 22183ade298ed277eb555aebd0f64af398e94e9e Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin at clickhouse.com>
Date: Fri, 13 Jun 2025 12:49:18 +0200
Subject: [PATCH 13/13] Reapply suggested fix
---
.../InstCombine/InstructionCombining.cpp | 24 +++++--------------
llvm/test/Transforms/InstCombine/select.ll | 15 ------------
.../PhaseOrdering/X86/vector-reductions.ll | 16 ++++++-------
3 files changed, 14 insertions(+), 41 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index b51328f111b94..291c738e94b85 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1721,24 +1721,12 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
// Avoid breaking min/max reduction pattern,
// which is necessary for vectorization later.
- if (auto *II = dyn_cast<IntrinsicInst>(&Op)) {
- switch (II->getIntrinsicID()) {
- case Intrinsic::umin:
- case Intrinsic::smin:
- if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
- if (C->isAllOnesValue())
- return nullptr;
- break;
- case Intrinsic::umax:
- case Intrinsic::smax:
- if (ConstantInt *C = dyn_cast<ConstantInt>(FV))
- if (C->isZero())
- return nullptr;
- break;
- default:
- break;
- }
- }
+ if (isa<MinMaxIntrinsic>(&Op))
+ for (Value *IntrinOp : Op.operands())
+ if (auto *PN = dyn_cast<PHINode>(IntrinOp))
+ for (Value *PhiOp : PN->operands())
+ if (PhiOp == &Op)
+ return nullptr;
// Test if a FCmpInst instruction is used exclusively by a select as
// part of a minimum or maximum operation. If so, refrain from doing
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index 67143bfe8f65c..ef5874ffd46ad 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -5048,21 +5048,6 @@ define <2 x ptr> @select_freeze_constant_expression_vector_gep(i1 %cond, <2 x pt
ret <2 x ptr> %sel
}
-declare i8 @llvm.umin.i8(i8, i8)
-
-define i8 @no_fold_masked_min(i8 %acc, i8 %val, i8 %mask) {
-; CHECK-LABEL: @no_fold_masked_min(
-; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[MASK:%.*]], 0
-; CHECK-NEXT: [[MASKED_VAL:%.*]] = select i1 [[COND]], i8 [[VAL:%.*]], i8 -1
-; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.umin.i8(i8 [[ACC:%.*]], i8 [[MASKED_VAL]])
-; CHECK-NEXT: ret i8 [[RES]]
-;
- %cond = icmp eq i8 %mask, 0
- %masked_val = select i1 %cond, i8 %val, i8 -1
- %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
- ret i8 %res
-}
-
define void @no_fold_masked_min_loop(ptr nocapture readonly %vals, ptr nocapture readonly %masks, ptr nocapture %out, i64 %n) {
; CHECK-LABEL: @no_fold_masked_min_loop(
; CHECK-NEXT: entry:
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
index 45632e878021e..2ec48a8637dae 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
@@ -356,14 +356,14 @@ define i8 @masked_min_reduction(ptr %data, ptr %mask) {
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD8]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD9]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD10]], zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI]], <32 x i8> [[WIDE_LOAD]])
-; CHECK-NEXT: [[TMP16]] = select <32 x i1> [[TMP8]], <32 x i8> [[TMP12]], <32 x i8> [[VEC_PHI]]
-; CHECK-NEXT: [[TMP14:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI1]], <32 x i8> [[WIDE_LOAD4]])
-; CHECK-NEXT: [[TMP17]] = select <32 x i1> [[TMP9]], <32 x i8> [[TMP14]], <32 x i8> [[VEC_PHI1]]
-; CHECK-NEXT: [[TMP23:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI2]], <32 x i8> [[WIDE_LOAD5]])
-; CHECK-NEXT: [[TMP18]] = select <32 x i1> [[TMP10]], <32 x i8> [[TMP23]], <32 x i8> [[VEC_PHI2]]
-; CHECK-NEXT: [[TMP24:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI3]], <32 x i8> [[WIDE_LOAD6]])
-; CHECK-NEXT: [[TMP19]] = select <32 x i1> [[TMP11]], <32 x i8> [[TMP24]], <32 x i8> [[VEC_PHI3]]
+; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP8]], <32 x i8> [[WIDE_LOAD]], <32 x i8> splat (i8 -1)
+; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP9]], <32 x i8> [[WIDE_LOAD4]], <32 x i8> splat (i8 -1)
+; CHECK-NEXT: [[TMP14:%.*]] = select <32 x i1> [[TMP10]], <32 x i8> [[WIDE_LOAD5]], <32 x i8> splat (i8 -1)
+; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP11]], <32 x i8> [[WIDE_LOAD6]], <32 x i8> splat (i8 -1)
+; CHECK-NEXT: [[TMP16]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI]], <32 x i8> [[TMP12]])
+; CHECK-NEXT: [[TMP17]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI1]], <32 x i8> [[TMP13]])
+; CHECK-NEXT: [[TMP18]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI2]], <32 x i8> [[TMP14]])
+; CHECK-NEXT: [[TMP19]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI3]], <32 x i8> [[TMP15]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 128
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
More information about the llvm-commits
mailing list