[llvm] ff36411 - [InstCombine] Use zext's nneg flag for icmp folding (#70845)

via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 12 08:53:58 PST 2023


Author: LĂ©onard Oest O'Leary
Date: 2023-11-13T00:53:53+08:00
New Revision: ff36411b23e55d71a45088c30540e228e9ca6973

URL: https://github.com/llvm/llvm-project/commit/ff36411b23e55d71a45088c30540e228e9ca6973
DIFF: https://github.com/llvm/llvm-project/commit/ff36411b23e55d71a45088c30540e228e9ca6973.diff

LOG: [InstCombine] Use zext's nneg flag for icmp folding (#70845)

This PR fixes https://github.com/llvm/llvm-project/issues/55013 : the
max intrinsics is not generated for this simple loop case :
https://godbolt.org/z/hxz1xhMPh. This is caused by a ICMP not being
folded into a select, thus not generating the max intrinsics.

For the story :

Since LLVM 14, SCCP pass got smarter by folding sext into zext for
positive ranges : https://reviews.llvm.org/D81756. After this change,
InstCombine was sometimes unable to fold ICMP correctly as both of the
arguments pointed to mismatched zext/sext. To fix this, @rotateright
implemented this fix : https://reviews.llvm.org/D124419 that tries to
resolve the mismatch by knowing if the argument of a zext is positive
(in which case, it is like a sext) by using ValueTracking, however
ValueTracking is not smart enough to infer that the value is positive in
some cases. Recently, @nikic implemented #67982 which keeps the
information that a zext is non-negative. This PR simply uses this
information to do the folding accordingly.

TLDR : This PR uses the recent nneg tag on zext to fold the icmp
accordingly in instcombine.

This PR also contains test cases for sext/zext folding with InstCombine
as well as a x86 regression tests for the max/min case.

Added: 
    llvm/test/Transforms/PhaseOrdering/min_max_loop.ll

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
    llvm/test/Transforms/InstCombine/icmp-ext-ext.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 7c2ad92f919a3cc..03bc0f16c8938ae 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -5554,11 +5554,16 @@ Instruction *InstCombinerImpl::foldICmpWithZextOrSext(ICmpInst &ICmp) {
         return new ICmpInst(ICmp.getPredicate(), Builder.CreateOr(X, Y),
                             Constant::getNullValue(X->getType()));
 
-      // If we have mismatched casts, treat the zext of a non-negative source as
-      // a sext to simulate matching casts. Otherwise, we are done.
-      // TODO: Can we handle some predicates (equality) without non-negative?
-      if ((IsZext0 && isKnownNonNegative(X, DL, 0, &AC, &ICmp, &DT)) ||
-          (IsZext1 && isKnownNonNegative(Y, DL, 0, &AC, &ICmp, &DT)))
+      // If we have mismatched casts and zext has the nneg flag, we can
+      //  treat the "zext nneg" as "sext". Otherwise, we cannot fold and quit.
+
+      auto *NonNegInst0 = dyn_cast<PossiblyNonNegInst>(ICmp.getOperand(0));
+      auto *NonNegInst1 = dyn_cast<PossiblyNonNegInst>(ICmp.getOperand(1));
+
+      bool IsNonNeg0 = NonNegInst0 && NonNegInst0->hasNonNeg();
+      bool IsNonNeg1 = NonNegInst1 && NonNegInst1->hasNonNeg();
+
+      if ((IsZext0 && IsNonNeg0) || (IsZext1 && IsNonNeg1))
         IsSignedExt = true;
       else
         return nullptr;

diff  --git a/llvm/test/Transforms/InstCombine/icmp-ext-ext.ll b/llvm/test/Transforms/InstCombine/icmp-ext-ext.ll
index f70e48e27384619..7fc42c65d758b3e 100644
--- a/llvm/test/Transforms/InstCombine/icmp-ext-ext.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-ext-ext.ll
@@ -130,6 +130,17 @@ define i1 @zext_sext_sgt(i8 %x, i8 %y) {
   ret i1 %c
 }
 
+define i1 @zext_nneg_sext_sgt(i8 %x, i8 %y) {
+; CHECK-LABEL: @zext_nneg_sext_sgt(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %a = zext nneg i8 %x to i32
+  %b = sext i8 %y to i32
+  %c = icmp sgt i32 %a, %b
+  ret i1 %c
+}
+
 define i1 @zext_sext_ugt(i8 %x, i8 %y) {
 ; CHECK-LABEL: @zext_sext_ugt(
 ; CHECK-NEXT:    [[A:%.*]] = zext i8 [[X:%.*]] to i32
@@ -143,6 +154,18 @@ define i1 @zext_sext_ugt(i8 %x, i8 %y) {
   ret i1 %c
 }
 
+
+define i1 @zext_nneg_sext_ugt(i8 %x, i8 %y) {
+; CHECK-LABEL: @zext_nneg_sext_ugt(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %a = zext nneg i8 %x to i32
+  %b = sext i8 %y to i32
+  %c = icmp ugt i32 %a, %b
+  ret i1 %c
+}
+
 define i1 @zext_sext_eq(i8 %x, i8 %y) {
 ; CHECK-LABEL: @zext_sext_eq(
 ; CHECK-NEXT:    [[A:%.*]] = zext i8 [[X:%.*]] to i32
@@ -156,6 +179,18 @@ define i1 @zext_sext_eq(i8 %x, i8 %y) {
   ret i1 %c
 }
 
+define i1 @zext_nneg_sext_eq(i8 %x, i8 %y) {
+; CHECK-LABEL: @zext_nneg_sext_eq(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %a = zext nneg i8 %x to i32
+  %b = sext i8 %y to i32
+  %c = icmp eq i32 %a, %b
+  ret i1 %c
+}
+
+
 define i1 @zext_sext_sle_op0_narrow(i8 %x, i16 %y) {
 ; CHECK-LABEL: @zext_sext_sle_op0_narrow(
 ; CHECK-NEXT:    [[A:%.*]] = zext i8 [[X:%.*]] to i32
@@ -169,6 +204,19 @@ define i1 @zext_sext_sle_op0_narrow(i8 %x, i16 %y) {
   ret i1 %c
 }
 
+
+define i1 @zext_nneg_sext_sle_op0_narrow(i8 %x, i16 %y) {
+; CHECK-LABEL: @zext_nneg_sext_sle_op0_narrow(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i8 [[X:%.*]] to i16
+; CHECK-NEXT:    [[C:%.*]] = icmp sle i16 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %a = zext nneg i8 %x to i32
+  %b = sext i16 %y to i32
+  %c = icmp sle i32 %a, %b
+  ret i1 %c
+}
+
 define i1 @zext_sext_ule_op0_wide(i9 %x, i8 %y) {
 ; CHECK-LABEL: @zext_sext_ule_op0_wide(
 ; CHECK-NEXT:    [[A:%.*]] = zext i9 [[X:%.*]] to i32
@@ -182,6 +230,18 @@ define i1 @zext_sext_ule_op0_wide(i9 %x, i8 %y) {
   ret i1 %c
 }
 
+define i1 @zext_nneg_sext_ule_op0_wide(i9 %x, i8 %y) {
+; CHECK-LABEL: @zext_nneg_sext_ule_op0_wide(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i8 [[Y:%.*]] to i9
+; CHECK-NEXT:    [[C:%.*]] = icmp uge i9 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %a = zext nneg i9 %x to i32
+  %b = sext i8 %y to i32
+  %c = icmp ule i32 %a, %b
+  ret i1 %c
+}
+
 define i1 @sext_zext_slt(i8 %x, i8 %y) {
 ; CHECK-LABEL: @sext_zext_slt(
 ; CHECK-NEXT:    [[A:%.*]] = sext i8 [[X:%.*]] to i32
@@ -195,6 +255,18 @@ define i1 @sext_zext_slt(i8 %x, i8 %y) {
   ret i1 %c
 }
 
+
+define i1 @sext_zext_nneg_slt(i8 %x, i8 %y) {
+; CHECK-LABEL: @sext_zext_nneg_slt(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %a = sext i8 %x to i32
+  %b = zext nneg i8 %y to i32
+  %c = icmp slt i32 %a, %b
+  ret i1 %c
+}
+
 define i1 @sext_zext_ult(i8 %x, i8 %y) {
 ; CHECK-LABEL: @sext_zext_ult(
 ; CHECK-NEXT:    [[A:%.*]] = sext i8 [[X:%.*]] to i32
@@ -208,6 +280,17 @@ define i1 @sext_zext_ult(i8 %x, i8 %y) {
   ret i1 %c
 }
 
+define i1 @sext_zext_nneg_ult(i8 %x, i8 %y) {
+; CHECK-LABEL: @sext_zext_nneg_ult(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %a = sext i8 %x to i32
+  %b = zext nneg i8 %y to i32
+  %c = icmp ult i32 %a, %b
+  ret i1 %c
+}
+
 define <2 x i1> @sext_zext_ne(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @sext_zext_ne(
 ; CHECK-NEXT:    [[A:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i32>
@@ -221,6 +304,18 @@ define <2 x i1> @sext_zext_ne(<2 x i8> %x, <2 x i8> %y) {
   ret <2 x i1> %c
 }
 
+
+define <2 x i1> @sext_zext_nneg_ne(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @sext_zext_nneg_ne(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %a = sext <2 x i8> %x to <2 x i32>
+  %b = zext nneg <2 x i8> %y to <2 x i32>
+  %c = icmp ne <2 x i32> %a, %b
+  ret <2 x i1> %c
+}
+
 define i1 @sext_zext_sge_op0_narrow(i5 %x, i8 %y) {
 ; CHECK-LABEL: @sext_zext_sge_op0_narrow(
 ; CHECK-NEXT:    [[A:%.*]] = sext i5 [[X:%.*]] to i32
@@ -234,6 +329,19 @@ define i1 @sext_zext_sge_op0_narrow(i5 %x, i8 %y) {
   ret i1 %c
 }
 
+
+define i1 @sext_zext_nneg_sge_op0_narrow(i5 %x, i8 %y) {
+; CHECK-LABEL: @sext_zext_nneg_sge_op0_narrow(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i5 [[X:%.*]] to i8
+; CHECK-NEXT:    [[C:%.*]] = icmp sge i8 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %a = sext i5 %x to i32
+  %b = zext nneg i8 %y to i32
+  %c = icmp sge i32 %a, %b
+  ret i1 %c
+}
+
 define i1 @sext_zext_uge_op0_wide(i16 %x, i8 %y) {
 ; CHECK-LABEL: @sext_zext_uge_op0_wide(
 ; CHECK-NEXT:    [[A:%.*]] = sext i16 [[X:%.*]] to i32
@@ -247,6 +355,19 @@ define i1 @sext_zext_uge_op0_wide(i16 %x, i8 %y) {
   ret i1 %c
 }
 
+
+define i1 @sext_zext_nneg_uge_op0_wide(i16 %x, i8 %y) {
+; CHECK-LABEL: @sext_zext_nneg_uge_op0_wide(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i8 [[Y:%.*]] to i16
+; CHECK-NEXT:    [[C:%.*]] = icmp ule i16 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %a = sext i16 %x to i32
+  %b = zext nneg i8 %y to i32
+  %c = icmp uge i32 %a, %b
+  ret i1 %c
+}
+
 define i1 @zext_sext_sgt_known_nonneg(i8 %x, i8 %y) {
 ; CHECK-LABEL: @zext_sext_sgt_known_nonneg(
 ; CHECK-NEXT:    [[N:%.*]] = udiv i8 127, [[X:%.*]]

diff  --git a/llvm/test/Transforms/PhaseOrdering/min_max_loop.ll b/llvm/test/Transforms/PhaseOrdering/min_max_loop.ll
new file mode 100644
index 000000000000000..fb338a6507eba86
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/min_max_loop.ll
@@ -0,0 +1,111 @@
+; RUN: opt < %s -O3 -S | FileCheck %s
+; See issue #55013 and PR #70845 for more details.
+; This test comes from the following C program, compiled with clang
+;
+;; short vecreduce_smin_v2i16(int n, short* v)
+;; {
+;;   short p = 0;
+;;   for (int i = 0; i < n; ++i)
+;;     p = p > v[i] ? v[i] : p;
+;;   return p;
+;; }
+;
+;; short vecreduce_smax_v2i16(int n, short* v)
+;; {
+;;   short p = 0;
+;;   for (int i = 0; i < n; ++i)
+;;     p = p < v[i] ? v[i] : p;
+;;   return p;
+;; }
+
+define i16 @vecreduce_smin_v2i16(i32 %n, ptr %v) {
+; CHECK-LABEL: define i16 @vecreduce_smin_v2i16(
+; CHECK:    @llvm.smin.v2i16
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %p.0 = phi i16 [ 0, %entry ], [ %conv8, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %conv = sext i16 %p.0 to i32
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom
+  %0 = load i16, ptr %arrayidx, align 2
+  %conv1 = sext i16 %0 to i32
+  %cmp2 = icmp sgt i32 %conv, %conv1
+  br i1 %cmp2, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %for.body
+  %idxprom4 = sext i32 %i.0 to i64
+  %arrayidx5 = getelementptr inbounds i16, ptr %v, i64 %idxprom4
+  %1 = load i16, ptr %arrayidx5, align 2
+  %conv6 = sext i16 %1 to i32
+  br label %cond.end
+
+cond.false:                                       ; preds = %for.body
+  %conv7 = sext i16 %p.0 to i32
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %conv6, %cond.true ], [ %conv7, %cond.false ]
+  %conv8 = trunc i32 %cond to i16
+  br label %for.inc
+
+for.inc:                                          ; preds = %cond.end
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret i16 %p.0
+}
+
+define i16 @vecreduce_smax_v2i16(i32 %n, ptr %v) {
+; CHECK-LABEL: define i16 @vecreduce_smax_v2i16(
+; CHECK:  @llvm.smax.v2i16
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %p.0 = phi i16 [ 0, %entry ], [ %conv8, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %conv = sext i16 %p.0 to i32
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom
+  %0 = load i16, ptr %arrayidx, align 2
+  %conv1 = sext i16 %0 to i32
+  %cmp2 = icmp slt i32 %conv, %conv1
+  br i1 %cmp2, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %for.body
+  %idxprom4 = sext i32 %i.0 to i64
+  %arrayidx5 = getelementptr inbounds i16, ptr %v, i64 %idxprom4
+  %1 = load i16, ptr %arrayidx5, align 2
+  %conv6 = sext i16 %1 to i32
+  br label %cond.end
+
+cond.false:                                       ; preds = %for.body
+  %conv7 = sext i16 %p.0 to i32
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %conv6, %cond.true ], [ %conv7, %cond.false ]
+  %conv8 = trunc i32 %cond to i16
+  br label %for.inc
+
+for.inc:                                          ; preds = %cond.end
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret i16 %p.0
+}


        


More information about the llvm-commits mailing list