[llvm] ff36411 - [InstCombine] Use zext's nneg flag for icmp folding (#70845)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 12 08:53:58 PST 2023
Author: LĂ©onard Oest O'Leary
Date: 2023-11-13T00:53:53+08:00
New Revision: ff36411b23e55d71a45088c30540e228e9ca6973
URL: https://github.com/llvm/llvm-project/commit/ff36411b23e55d71a45088c30540e228e9ca6973
DIFF: https://github.com/llvm/llvm-project/commit/ff36411b23e55d71a45088c30540e228e9ca6973.diff
LOG: [InstCombine] Use zext's nneg flag for icmp folding (#70845)
This PR fixes https://github.com/llvm/llvm-project/issues/55013 : the
max intrinsics is not generated for this simple loop case :
https://godbolt.org/z/hxz1xhMPh. This is caused by a ICMP not being
folded into a select, thus not generating the max intrinsics.
For the story :
Since LLVM 14, SCCP pass got smarter by folding sext into zext for
positive ranges : https://reviews.llvm.org/D81756. After this change,
InstCombine was sometimes unable to fold ICMP correctly as both of the
arguments pointed to mismatched zext/sext. To fix this, @rotateright
implemented this fix : https://reviews.llvm.org/D124419 that tries to
resolve the mismatch by knowing if the argument of a zext is positive
(in which case, it is like a sext) by using ValueTracking, however
ValueTracking is not smart enough to infer that the value is positive in
some cases. Recently, @nikic implemented #67982 which keeps the
information that a zext is non-negative. This PR simply uses this
information to do the folding accordingly.
TLDR : This PR uses the recent nneg tag on zext to fold the icmp
accordingly in instcombine.
This PR also contains test cases for sext/zext folding with InstCombine
as well as a x86 regression tests for the max/min case.
Added:
llvm/test/Transforms/PhaseOrdering/min_max_loop.ll
Modified:
llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
llvm/test/Transforms/InstCombine/icmp-ext-ext.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 7c2ad92f919a3cc..03bc0f16c8938ae 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -5554,11 +5554,16 @@ Instruction *InstCombinerImpl::foldICmpWithZextOrSext(ICmpInst &ICmp) {
return new ICmpInst(ICmp.getPredicate(), Builder.CreateOr(X, Y),
Constant::getNullValue(X->getType()));
- // If we have mismatched casts, treat the zext of a non-negative source as
- // a sext to simulate matching casts. Otherwise, we are done.
- // TODO: Can we handle some predicates (equality) without non-negative?
- if ((IsZext0 && isKnownNonNegative(X, DL, 0, &AC, &ICmp, &DT)) ||
- (IsZext1 && isKnownNonNegative(Y, DL, 0, &AC, &ICmp, &DT)))
+ // If we have mismatched casts and zext has the nneg flag, we can
+ // treat the "zext nneg" as "sext". Otherwise, we cannot fold and quit.
+
+ auto *NonNegInst0 = dyn_cast<PossiblyNonNegInst>(ICmp.getOperand(0));
+ auto *NonNegInst1 = dyn_cast<PossiblyNonNegInst>(ICmp.getOperand(1));
+
+ bool IsNonNeg0 = NonNegInst0 && NonNegInst0->hasNonNeg();
+ bool IsNonNeg1 = NonNegInst1 && NonNegInst1->hasNonNeg();
+
+ if ((IsZext0 && IsNonNeg0) || (IsZext1 && IsNonNeg1))
IsSignedExt = true;
else
return nullptr;
diff --git a/llvm/test/Transforms/InstCombine/icmp-ext-ext.ll b/llvm/test/Transforms/InstCombine/icmp-ext-ext.ll
index f70e48e27384619..7fc42c65d758b3e 100644
--- a/llvm/test/Transforms/InstCombine/icmp-ext-ext.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-ext-ext.ll
@@ -130,6 +130,17 @@ define i1 @zext_sext_sgt(i8 %x, i8 %y) {
ret i1 %c
}
+define i1 @zext_nneg_sext_sgt(i8 %x, i8 %y) {
+; CHECK-LABEL: @zext_nneg_sext_sgt(
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = zext nneg i8 %x to i32
+ %b = sext i8 %y to i32
+ %c = icmp sgt i32 %a, %b
+ ret i1 %c
+}
+
define i1 @zext_sext_ugt(i8 %x, i8 %y) {
; CHECK-LABEL: @zext_sext_ugt(
; CHECK-NEXT: [[A:%.*]] = zext i8 [[X:%.*]] to i32
@@ -143,6 +154,18 @@ define i1 @zext_sext_ugt(i8 %x, i8 %y) {
ret i1 %c
}
+
+define i1 @zext_nneg_sext_ugt(i8 %x, i8 %y) {
+; CHECK-LABEL: @zext_nneg_sext_ugt(
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = zext nneg i8 %x to i32
+ %b = sext i8 %y to i32
+ %c = icmp ugt i32 %a, %b
+ ret i1 %c
+}
+
define i1 @zext_sext_eq(i8 %x, i8 %y) {
; CHECK-LABEL: @zext_sext_eq(
; CHECK-NEXT: [[A:%.*]] = zext i8 [[X:%.*]] to i32
@@ -156,6 +179,18 @@ define i1 @zext_sext_eq(i8 %x, i8 %y) {
ret i1 %c
}
+define i1 @zext_nneg_sext_eq(i8 %x, i8 %y) {
+; CHECK-LABEL: @zext_nneg_sext_eq(
+; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = zext nneg i8 %x to i32
+ %b = sext i8 %y to i32
+ %c = icmp eq i32 %a, %b
+ ret i1 %c
+}
+
+
define i1 @zext_sext_sle_op0_narrow(i8 %x, i16 %y) {
; CHECK-LABEL: @zext_sext_sle_op0_narrow(
; CHECK-NEXT: [[A:%.*]] = zext i8 [[X:%.*]] to i32
@@ -169,6 +204,19 @@ define i1 @zext_sext_sle_op0_narrow(i8 %x, i16 %y) {
ret i1 %c
}
+
+define i1 @zext_nneg_sext_sle_op0_narrow(i8 %x, i16 %y) {
+; CHECK-LABEL: @zext_nneg_sext_sle_op0_narrow(
+; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[X:%.*]] to i16
+; CHECK-NEXT: [[C:%.*]] = icmp sle i16 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = zext nneg i8 %x to i32
+ %b = sext i16 %y to i32
+ %c = icmp sle i32 %a, %b
+ ret i1 %c
+}
+
define i1 @zext_sext_ule_op0_wide(i9 %x, i8 %y) {
; CHECK-LABEL: @zext_sext_ule_op0_wide(
; CHECK-NEXT: [[A:%.*]] = zext i9 [[X:%.*]] to i32
@@ -182,6 +230,18 @@ define i1 @zext_sext_ule_op0_wide(i9 %x, i8 %y) {
ret i1 %c
}
+define i1 @zext_nneg_sext_ule_op0_wide(i9 %x, i8 %y) {
+; CHECK-LABEL: @zext_nneg_sext_ule_op0_wide(
+; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[Y:%.*]] to i9
+; CHECK-NEXT: [[C:%.*]] = icmp uge i9 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = zext nneg i9 %x to i32
+ %b = sext i8 %y to i32
+ %c = icmp ule i32 %a, %b
+ ret i1 %c
+}
+
define i1 @sext_zext_slt(i8 %x, i8 %y) {
; CHECK-LABEL: @sext_zext_slt(
; CHECK-NEXT: [[A:%.*]] = sext i8 [[X:%.*]] to i32
@@ -195,6 +255,18 @@ define i1 @sext_zext_slt(i8 %x, i8 %y) {
ret i1 %c
}
+
+define i1 @sext_zext_nneg_slt(i8 %x, i8 %y) {
+; CHECK-LABEL: @sext_zext_nneg_slt(
+; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = sext i8 %x to i32
+ %b = zext nneg i8 %y to i32
+ %c = icmp slt i32 %a, %b
+ ret i1 %c
+}
+
define i1 @sext_zext_ult(i8 %x, i8 %y) {
; CHECK-LABEL: @sext_zext_ult(
; CHECK-NEXT: [[A:%.*]] = sext i8 [[X:%.*]] to i32
@@ -208,6 +280,17 @@ define i1 @sext_zext_ult(i8 %x, i8 %y) {
ret i1 %c
}
+define i1 @sext_zext_nneg_ult(i8 %x, i8 %y) {
+; CHECK-LABEL: @sext_zext_nneg_ult(
+; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = sext i8 %x to i32
+ %b = zext nneg i8 %y to i32
+ %c = icmp ult i32 %a, %b
+ ret i1 %c
+}
+
define <2 x i1> @sext_zext_ne(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @sext_zext_ne(
; CHECK-NEXT: [[A:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i32>
@@ -221,6 +304,18 @@ define <2 x i1> @sext_zext_ne(<2 x i8> %x, <2 x i8> %y) {
ret <2 x i1> %c
}
+
+define <2 x i1> @sext_zext_nneg_ne(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @sext_zext_nneg_ne(
+; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: ret <2 x i1> [[C]]
+;
+ %a = sext <2 x i8> %x to <2 x i32>
+ %b = zext nneg <2 x i8> %y to <2 x i32>
+ %c = icmp ne <2 x i32> %a, %b
+ ret <2 x i1> %c
+}
+
define i1 @sext_zext_sge_op0_narrow(i5 %x, i8 %y) {
; CHECK-LABEL: @sext_zext_sge_op0_narrow(
; CHECK-NEXT: [[A:%.*]] = sext i5 [[X:%.*]] to i32
@@ -234,6 +329,19 @@ define i1 @sext_zext_sge_op0_narrow(i5 %x, i8 %y) {
ret i1 %c
}
+
+define i1 @sext_zext_nneg_sge_op0_narrow(i5 %x, i8 %y) {
+; CHECK-LABEL: @sext_zext_nneg_sge_op0_narrow(
+; CHECK-NEXT: [[TMP1:%.*]] = sext i5 [[X:%.*]] to i8
+; CHECK-NEXT: [[C:%.*]] = icmp sge i8 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = sext i5 %x to i32
+ %b = zext nneg i8 %y to i32
+ %c = icmp sge i32 %a, %b
+ ret i1 %c
+}
+
define i1 @sext_zext_uge_op0_wide(i16 %x, i8 %y) {
; CHECK-LABEL: @sext_zext_uge_op0_wide(
; CHECK-NEXT: [[A:%.*]] = sext i16 [[X:%.*]] to i32
@@ -247,6 +355,19 @@ define i1 @sext_zext_uge_op0_wide(i16 %x, i8 %y) {
ret i1 %c
}
+
+define i1 @sext_zext_nneg_uge_op0_wide(i16 %x, i8 %y) {
+; CHECK-LABEL: @sext_zext_nneg_uge_op0_wide(
+; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[Y:%.*]] to i16
+; CHECK-NEXT: [[C:%.*]] = icmp ule i16 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = sext i16 %x to i32
+ %b = zext nneg i8 %y to i32
+ %c = icmp uge i32 %a, %b
+ ret i1 %c
+}
+
define i1 @zext_sext_sgt_known_nonneg(i8 %x, i8 %y) {
; CHECK-LABEL: @zext_sext_sgt_known_nonneg(
; CHECK-NEXT: [[N:%.*]] = udiv i8 127, [[X:%.*]]
diff --git a/llvm/test/Transforms/PhaseOrdering/min_max_loop.ll b/llvm/test/Transforms/PhaseOrdering/min_max_loop.ll
new file mode 100644
index 000000000000000..fb338a6507eba86
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/min_max_loop.ll
@@ -0,0 +1,111 @@
+; RUN: opt < %s -O3 -S | FileCheck %s
+; See issue #55013 and PR #70845 for more details.
+; This test comes from the following C program, compiled with clang
+;
+;; short vecreduce_smin_v2i16(int n, short* v)
+;; {
+;; short p = 0;
+;; for (int i = 0; i < n; ++i)
+;; p = p > v[i] ? v[i] : p;
+;; return p;
+;; }
+;
+;; short vecreduce_smax_v2i16(int n, short* v)
+;; {
+;; short p = 0;
+;; for (int i = 0; i < n; ++i)
+;; p = p < v[i] ? v[i] : p;
+;; return p;
+;; }
+
+define i16 @vecreduce_smin_v2i16(i32 %n, ptr %v) {
+; CHECK-LABEL: define i16 @vecreduce_smin_v2i16(
+; CHECK: @llvm.smin.v2i16
+
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %p.0 = phi i16 [ 0, %entry ], [ %conv8, %for.inc ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i.0, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %conv = sext i16 %p.0 to i32
+ %idxprom = sext i32 %i.0 to i64
+ %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom
+ %0 = load i16, ptr %arrayidx, align 2
+ %conv1 = sext i16 %0 to i32
+ %cmp2 = icmp sgt i32 %conv, %conv1
+ br i1 %cmp2, label %cond.true, label %cond.false
+
+cond.true: ; preds = %for.body
+ %idxprom4 = sext i32 %i.0 to i64
+ %arrayidx5 = getelementptr inbounds i16, ptr %v, i64 %idxprom4
+ %1 = load i16, ptr %arrayidx5, align 2
+ %conv6 = sext i16 %1 to i32
+ br label %cond.end
+
+cond.false: ; preds = %for.body
+ %conv7 = sext i16 %p.0 to i32
+ br label %cond.end
+
+cond.end: ; preds = %cond.false, %cond.true
+ %cond = phi i32 [ %conv6, %cond.true ], [ %conv7, %cond.false ]
+ %conv8 = trunc i32 %cond to i16
+ br label %for.inc
+
+for.inc: ; preds = %cond.end
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret i16 %p.0
+}
+
+define i16 @vecreduce_smax_v2i16(i32 %n, ptr %v) {
+; CHECK-LABEL: define i16 @vecreduce_smax_v2i16(
+; CHECK: @llvm.smax.v2i16
+
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %p.0 = phi i16 [ 0, %entry ], [ %conv8, %for.inc ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i.0, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %conv = sext i16 %p.0 to i32
+ %idxprom = sext i32 %i.0 to i64
+ %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom
+ %0 = load i16, ptr %arrayidx, align 2
+ %conv1 = sext i16 %0 to i32
+ %cmp2 = icmp slt i32 %conv, %conv1
+ br i1 %cmp2, label %cond.true, label %cond.false
+
+cond.true: ; preds = %for.body
+ %idxprom4 = sext i32 %i.0 to i64
+ %arrayidx5 = getelementptr inbounds i16, ptr %v, i64 %idxprom4
+ %1 = load i16, ptr %arrayidx5, align 2
+ %conv6 = sext i16 %1 to i32
+ br label %cond.end
+
+cond.false: ; preds = %for.body
+ %conv7 = sext i16 %p.0 to i32
+ br label %cond.end
+
+cond.end: ; preds = %cond.false, %cond.true
+ %cond = phi i32 [ %conv6, %cond.true ], [ %conv7, %cond.false ]
+ %conv8 = trunc i32 %cond to i16
+ br label %for.inc
+
+for.inc: ; preds = %cond.end
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret i16 %p.0
+}
More information about the llvm-commits
mailing list