[llvm] de2b6cb - [InstCombine] Fold icmp over select of cmp more aggressively (#105536)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 22 00:47:39 PDT 2024
Author: Nikita Popov
Date: 2024-08-22T09:47:35+02:00
New Revision: de2b6cb6ab6472a13c68ddcd963aa2f25e298772
URL: https://github.com/llvm/llvm-project/commit/de2b6cb6ab6472a13c68ddcd963aa2f25e298772
DIFF: https://github.com/llvm/llvm-project/commit/de2b6cb6ab6472a13c68ddcd963aa2f25e298772.diff
LOG: [InstCombine] Fold icmp over select of cmp more aggressively (#105536)
When folding an icmp into a select, treat an icmp of a constant with a
one-use ucmp/scmp intrinsic as a simplification. These comparisons will
reduce down to an icmp.
This addresses a regression seen in Rust and also in llvm-opt-benchmark.
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
llvm/test/Transforms/InstCombine/select-cmp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 34c9e0fde4f428..8e8d472a5df1d3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4209,6 +4209,14 @@ Instruction *InstCombinerImpl::foldSelectICmp(ICmpInst::Predicate Pred,
if (Op2)
CI = dyn_cast<ConstantInt>(Op2);
+ auto Simplifies = [&](Value *Op, unsigned Idx) {
+ // A comparison of ucmp/scmp with a constant will fold into an icmp.
+ const APInt *Dummy;
+ return Op ||
+ (isa<CmpIntrinsic>(SI->getOperand(Idx)) &&
+ SI->getOperand(Idx)->hasOneUse() && match(RHS, m_APInt(Dummy)));
+ };
+
// We only want to perform this transformation if it will not lead to
// additional code. This is true if either both sides of the select
// fold to a constant (in which case the icmp is replaced with a select
@@ -4219,7 +4227,7 @@ Instruction *InstCombinerImpl::foldSelectICmp(ICmpInst::Predicate Pred,
bool Transform = false;
if (Op1 && Op2)
Transform = true;
- else if (Op1 || Op2) {
+ else if (Simplifies(Op1, 1) || Simplifies(Op2, 2)) {
// Local case
if (SI->hasOneUse())
Transform = true;
diff --git a/llvm/test/Transforms/InstCombine/select-cmp.ll b/llvm/test/Transforms/InstCombine/select-cmp.ll
index 697010b90db584..234815949d77d4 100644
--- a/llvm/test/Transforms/InstCombine/select-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/select-cmp.ll
@@ -482,10 +482,9 @@ define i1 @test_select_inverse_nonconst4(i64 %x, i64 %y, i64 %z, i1 %cond) {
define i1 @sel_icmp_two_cmp(i1 %c, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
; CHECK-LABEL: @sel_icmp_two_cmp(
-; CHECK-NEXT: [[V1:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[A1:%.*]], i32 [[A2:%.*]])
-; CHECK-NEXT: [[V2:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[A3:%.*]], i32 [[A4:%.*]])
-; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i8 [[V1]], i8 [[V2]]
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[SEL]], 1
+; CHECK-NEXT: [[CMP1:%.*]] = icmp ule i32 [[A1:%.*]], [[A2:%.*]]
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[A3:%.*]], [[A4:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = select i1 [[C:%.*]], i1 [[CMP1]], i1 [[CMP2]]
; CHECK-NEXT: ret i1 [[CMP]]
;
%v1 = call i8 @llvm.ucmp(i32 %a1, i32 %a2)
@@ -498,10 +497,10 @@ define i1 @sel_icmp_two_cmp(i1 %c, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
define i1 @sel_icmp_two_cmp_extra_use1(i1 %c, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
; CHECK-LABEL: @sel_icmp_two_cmp_extra_use1(
; CHECK-NEXT: [[V1:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[A1:%.*]], i32 [[A2:%.*]])
-; CHECK-NEXT: [[V2:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[A3:%.*]], i32 [[A4:%.*]])
; CHECK-NEXT: call void @use.i8(i8 [[V1]])
-; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i8 [[V1]], i8 [[V2]]
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[SEL]], 1
+; CHECK-NEXT: [[CMP1:%.*]] = icmp ule i32 [[A1]], [[A2]]
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[A3:%.*]], [[A4:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = select i1 [[C:%.*]], i1 [[CMP1]], i1 [[CMP2]]
; CHECK-NEXT: ret i1 [[CMP]]
;
%v1 = call i8 @llvm.ucmp(i32 %a1, i32 %a2)
@@ -544,6 +543,35 @@ define i1 @sel_icmp_two_cmp_not_const(i1 %c, i32 %a1, i32 %a2, i32 %a3, i32 %a4,
ret i1 %cmp
}
+define <2 x i1> @sel_icmp_two_cmp_vec(i1 %c, <2 x i32> %a1, <2 x i32> %a2, <2 x i32> %a3, <2 x i32> %a4) {
+; CHECK-LABEL: @sel_icmp_two_cmp_vec(
+; CHECK-NEXT: [[CMP1:%.*]] = icmp ule <2 x i32> [[A1:%.*]], [[A2:%.*]]
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sle <2 x i32> [[A3:%.*]], [[A4:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = select i1 [[C:%.*]], <2 x i1> [[CMP1]], <2 x i1> [[CMP2]]
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %v1 = call <2 x i8> @llvm.ucmp(<2 x i32> %a1, <2 x i32> %a2)
+ %v2 = call <2 x i8> @llvm.scmp(<2 x i32> %a3, <2 x i32> %a4)
+ %sel = select i1 %c, <2 x i8> %v1, <2 x i8> %v2
+ %cmp = icmp sle <2 x i8> %sel, zeroinitializer
+ ret <2 x i1> %cmp
+}
+
+define <2 x i1> @sel_icmp_two_cmp_vec_nonsplat(i1 %c, <2 x i32> %a1, <2 x i32> %a2, <2 x i32> %a3, <2 x i32> %a4) {
+; CHECK-LABEL: @sel_icmp_two_cmp_vec_nonsplat(
+; CHECK-NEXT: [[V1:%.*]] = call <2 x i8> @llvm.ucmp.v2i8.v2i32(<2 x i32> [[A1:%.*]], <2 x i32> [[A2:%.*]])
+; CHECK-NEXT: [[V2:%.*]] = call <2 x i8> @llvm.scmp.v2i8.v2i32(<2 x i32> [[A3:%.*]], <2 x i32> [[A4:%.*]])
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], <2 x i8> [[V1]], <2 x i8> [[V2]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[SEL]], <i8 1, i8 2>
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %v1 = call <2 x i8> @llvm.ucmp(<2 x i32> %a1, <2 x i32> %a2)
+ %v2 = call <2 x i8> @llvm.scmp(<2 x i32> %a3, <2 x i32> %a4)
+ %sel = select i1 %c, <2 x i8> %v1, <2 x i8> %v2
+ %cmp = icmp sle <2 x i8> %sel, <i8 0, i8 1>
+ ret <2 x i1> %cmp
+}
+
define i1 @sel_icmp_cmp_and_simplify(i1 %c, i32 %a1, i32 %a2) {
; CHECK-LABEL: @sel_icmp_cmp_and_simplify(
; CHECK-NEXT: [[CMP1:%.*]] = icmp ule i32 [[A1:%.*]], [[A2:%.*]]
@@ -559,9 +587,9 @@ define i1 @sel_icmp_cmp_and_simplify(i1 %c, i32 %a1, i32 %a2) {
define i1 @sel_icmp_cmp_and_no_simplify(i1 %c, i32 %a1, i32 %a2, i8 %b) {
; CHECK-LABEL: @sel_icmp_cmp_and_no_simplify(
-; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[A1:%.*]], i32 [[A2:%.*]])
-; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i8 [[V]], i8 [[B:%.*]]
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[SEL]], 1
+; CHECK-NEXT: [[CMP1:%.*]] = icmp ule i32 [[A1:%.*]], [[A2:%.*]]
+; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i8 [[B:%.*]], 1
+; CHECK-NEXT: [[CMP:%.*]] = select i1 [[C:%.*]], i1 [[CMP1]], i1 [[CMP2]]
; CHECK-NEXT: ret i1 [[CMP]]
;
%v = call i8 @llvm.ucmp(i32 %a1, i32 %a2)
@@ -572,9 +600,9 @@ define i1 @sel_icmp_cmp_and_no_simplify(i1 %c, i32 %a1, i32 %a2, i8 %b) {
define i1 @sel_icmp_cmp_and_no_simplify_comm(i1 %c, i32 %a1, i32 %a2, i8 %b) {
; CHECK-LABEL: @sel_icmp_cmp_and_no_simplify_comm(
-; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[A1:%.*]], i32 [[A2:%.*]])
-; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i8 [[B:%.*]], i8 [[V]]
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[SEL]], 1
+; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i8 [[B:%.*]], 1
+; CHECK-NEXT: [[CMP2:%.*]] = icmp ule i32 [[A1:%.*]], [[A2:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = select i1 [[C:%.*]], i1 [[CMP1]], i1 [[CMP2]]
; CHECK-NEXT: ret i1 [[CMP]]
;
%v = call i8 @llvm.ucmp(i32 %a1, i32 %a2)
More information about the llvm-commits
mailing list