[llvm] de2b6cb - [InstCombine] Fold icmp over select of cmp more aggressively (#105536)

via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 22 00:47:39 PDT 2024


Author: Nikita Popov
Date: 2024-08-22T09:47:35+02:00
New Revision: de2b6cb6ab6472a13c68ddcd963aa2f25e298772

URL: https://github.com/llvm/llvm-project/commit/de2b6cb6ab6472a13c68ddcd963aa2f25e298772
DIFF: https://github.com/llvm/llvm-project/commit/de2b6cb6ab6472a13c68ddcd963aa2f25e298772.diff

LOG: [InstCombine] Fold icmp over select of cmp more aggressively (#105536)

When folding an icmp into a select, treat an icmp of a constant with a
one-use ucmp/scmp intrinsic as a simplification. These comparisons will
reduce down to an icmp.

This addresses a regression seen in Rust and also in llvm-opt-benchmark.

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
    llvm/test/Transforms/InstCombine/select-cmp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 34c9e0fde4f428..8e8d472a5df1d3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4209,6 +4209,14 @@ Instruction *InstCombinerImpl::foldSelectICmp(ICmpInst::Predicate Pred,
   if (Op2)
     CI = dyn_cast<ConstantInt>(Op2);
 
+  auto Simplifies = [&](Value *Op, unsigned Idx) {
+    // A comparison of ucmp/scmp with a constant will fold into an icmp.
+    const APInt *Dummy;
+    return Op ||
+           (isa<CmpIntrinsic>(SI->getOperand(Idx)) &&
+            SI->getOperand(Idx)->hasOneUse() && match(RHS, m_APInt(Dummy)));
+  };
+
   // We only want to perform this transformation if it will not lead to
   // additional code. This is true if either both sides of the select
   // fold to a constant (in which case the icmp is replaced with a select
@@ -4219,7 +4227,7 @@ Instruction *InstCombinerImpl::foldSelectICmp(ICmpInst::Predicate Pred,
   bool Transform = false;
   if (Op1 && Op2)
     Transform = true;
-  else if (Op1 || Op2) {
+  else if (Simplifies(Op1, 1) || Simplifies(Op2, 2)) {
     // Local case
     if (SI->hasOneUse())
       Transform = true;

diff  --git a/llvm/test/Transforms/InstCombine/select-cmp.ll b/llvm/test/Transforms/InstCombine/select-cmp.ll
index 697010b90db584..234815949d77d4 100644
--- a/llvm/test/Transforms/InstCombine/select-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/select-cmp.ll
@@ -482,10 +482,9 @@ define i1 @test_select_inverse_nonconst4(i64 %x, i64 %y, i64 %z, i1 %cond) {
 
 define i1 @sel_icmp_two_cmp(i1 %c, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 ; CHECK-LABEL: @sel_icmp_two_cmp(
-; CHECK-NEXT:    [[V1:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[A1:%.*]], i32 [[A2:%.*]])
-; CHECK-NEXT:    [[V2:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[A3:%.*]], i32 [[A4:%.*]])
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C:%.*]], i8 [[V1]], i8 [[V2]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[SEL]], 1
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ule i32 [[A1:%.*]], [[A2:%.*]]
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[A3:%.*]], [[A4:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = select i1 [[C:%.*]], i1 [[CMP1]], i1 [[CMP2]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %v1 = call i8 @llvm.ucmp(i32 %a1, i32 %a2)
@@ -498,10 +497,10 @@ define i1 @sel_icmp_two_cmp(i1 %c, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 define i1 @sel_icmp_two_cmp_extra_use1(i1 %c, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 ; CHECK-LABEL: @sel_icmp_two_cmp_extra_use1(
 ; CHECK-NEXT:    [[V1:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[A1:%.*]], i32 [[A2:%.*]])
-; CHECK-NEXT:    [[V2:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[A3:%.*]], i32 [[A4:%.*]])
 ; CHECK-NEXT:    call void @use.i8(i8 [[V1]])
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C:%.*]], i8 [[V1]], i8 [[V2]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[SEL]], 1
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ule i32 [[A1]], [[A2]]
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[A3:%.*]], [[A4:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = select i1 [[C:%.*]], i1 [[CMP1]], i1 [[CMP2]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %v1 = call i8 @llvm.ucmp(i32 %a1, i32 %a2)
@@ -544,6 +543,35 @@ define i1 @sel_icmp_two_cmp_not_const(i1 %c, i32 %a1, i32 %a2, i32 %a3, i32 %a4,
   ret i1 %cmp
 }
 
+define <2 x i1> @sel_icmp_two_cmp_vec(i1 %c, <2 x i32> %a1, <2 x i32> %a2, <2 x i32> %a3, <2 x i32> %a4) {
+; CHECK-LABEL: @sel_icmp_two_cmp_vec(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ule <2 x i32> [[A1:%.*]], [[A2:%.*]]
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sle <2 x i32> [[A3:%.*]], [[A4:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = select i1 [[C:%.*]], <2 x i1> [[CMP1]], <2 x i1> [[CMP2]]
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %v1 = call <2 x i8> @llvm.ucmp(<2 x i32> %a1, <2 x i32> %a2)
+  %v2 = call <2 x i8> @llvm.scmp(<2 x i32> %a3, <2 x i32> %a4)
+  %sel = select i1 %c, <2 x i8> %v1, <2 x i8> %v2
+  %cmp = icmp sle <2 x i8> %sel, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @sel_icmp_two_cmp_vec_nonsplat(i1 %c, <2 x i32> %a1, <2 x i32> %a2, <2 x i32> %a3, <2 x i32> %a4) {
+; CHECK-LABEL: @sel_icmp_two_cmp_vec_nonsplat(
+; CHECK-NEXT:    [[V1:%.*]] = call <2 x i8> @llvm.ucmp.v2i8.v2i32(<2 x i32> [[A1:%.*]], <2 x i32> [[A2:%.*]])
+; CHECK-NEXT:    [[V2:%.*]] = call <2 x i8> @llvm.scmp.v2i8.v2i32(<2 x i32> [[A3:%.*]], <2 x i32> [[A4:%.*]])
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C:%.*]], <2 x i8> [[V1]], <2 x i8> [[V2]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[SEL]], <i8 1, i8 2>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %v1 = call <2 x i8> @llvm.ucmp(<2 x i32> %a1, <2 x i32> %a2)
+  %v2 = call <2 x i8> @llvm.scmp(<2 x i32> %a3, <2 x i32> %a4)
+  %sel = select i1 %c, <2 x i8> %v1, <2 x i8> %v2
+  %cmp = icmp sle <2 x i8> %sel, <i8 0, i8 1>
+  ret <2 x i1> %cmp
+}
+
 define i1 @sel_icmp_cmp_and_simplify(i1 %c, i32 %a1, i32 %a2) {
 ; CHECK-LABEL: @sel_icmp_cmp_and_simplify(
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp ule i32 [[A1:%.*]], [[A2:%.*]]
@@ -559,9 +587,9 @@ define i1 @sel_icmp_cmp_and_simplify(i1 %c, i32 %a1, i32 %a2) {
 
 define i1 @sel_icmp_cmp_and_no_simplify(i1 %c, i32 %a1, i32 %a2, i8 %b) {
 ; CHECK-LABEL: @sel_icmp_cmp_and_no_simplify(
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[A1:%.*]], i32 [[A2:%.*]])
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C:%.*]], i8 [[V]], i8 [[B:%.*]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[SEL]], 1
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ule i32 [[A1:%.*]], [[A2:%.*]]
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i8 [[B:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = select i1 [[C:%.*]], i1 [[CMP1]], i1 [[CMP2]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %v = call i8 @llvm.ucmp(i32 %a1, i32 %a2)
@@ -572,9 +600,9 @@ define i1 @sel_icmp_cmp_and_no_simplify(i1 %c, i32 %a1, i32 %a2, i8 %b) {
 
 define i1 @sel_icmp_cmp_and_no_simplify_comm(i1 %c, i32 %a1, i32 %a2, i8 %b) {
 ; CHECK-LABEL: @sel_icmp_cmp_and_no_simplify_comm(
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[A1:%.*]], i32 [[A2:%.*]])
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C:%.*]], i8 [[B:%.*]], i8 [[V]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[SEL]], 1
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i8 [[B:%.*]], 1
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ule i32 [[A1:%.*]], [[A2:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = select i1 [[C:%.*]], i1 [[CMP1]], i1 [[CMP2]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %v = call i8 @llvm.ucmp(i32 %a1, i32 %a2)


        


More information about the llvm-commits mailing list