[llvm] [InstCombine] limit icmp sgt (shl nsw X, C1), C0 --> icmp sgt X, C0 >> C1 to hasOneUse (PR #74318)

Mon Dec 4 05:30:28 PST 2023

https://github.com/ChunyuLiao created https://github.com/llvm/llvm-project/pull/74318

Fix: https://github.com/llvm/llvm-project/issues/30121

Testcase ir:  https://gcc.godbolt.org/z/oKTPE7v48
```
  %cmp11 = icmp sgt i32 %n, 0
  br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:                               ; preds = %entry
  %mul = shl i32 %n, 4
  %smax = tail call i32 @llvm.smax.i32(i32 %mul, i32 1)
  %wide.trip.count = zext nneg i32 %smax to i64
  br label %for.body
```

If add the limitations, @llvm.smax shouldn't be needed.

```
  %mul = shl nsw i32 %n, 4
  %cmp11 = icmp sgt i32 %mul, 0
  br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:                               ; preds = %entry
  %smax = tail call i32 @llvm.smax.i32(i32 %mul, i32 1) --- this shouldn't be needed

```
If we need icmp sgt (shl nsw X, C1), C0 --> icmp sgt X, C0 >> C1， perhaps we can implement this at DAG combine.

>From 2bdf1a8432577d9f6706a08e1d3fb456d10669ac Mon Sep 17 00:00:00 2001
From: Liao Chunyu <chunyu at iscas.ac.cn>
Date: Mon, 4 Dec 2023 20:03:21 +0800
Subject: [PATCH] [InstCombine] limit icmp sgt (shl nsw X, C1), C0 --> icmp sgt
 X, C0 >> C1 to hasOneUse
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix: https://github.com/llvm/llvm-project/issues/30121

Testcase ir:  https://gcc.godbolt.org/z/oKTPE7v48
```
  %cmp11 = icmp sgt i32 %n, 0
  br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:                               ; preds = %entry
  %mul = shl i32 %n, 4
  %smax = tail call i32 @llvm.smax.i32(i32 %mul, i32 1)
  %wide.trip.count = zext nneg i32 %smax to i64
  br label %for.body
```

If add the limitations, @llvm.smax shouldn't be needed.

```
  %mul = shl nsw i32 %n, 4
  %cmp11 = icmp sgt i32 %mul, 0
  br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:                               ; preds = %entry
  %smax = tail call i32 @llvm.smax.i32(i32 %mul, i32 1) --- this shouldn't be needed

```
If we need icmp sgt (shl nsw X, C1), C0 --> icmp sgt X, C0 >> C1，
perhaps we can implement this at DAG combine.
---
 .../InstCombine/InstCombineCompares.cpp       |  5 +-
 .../test/Transforms/IndVarSimplify/pr30121.ll | 53 +++++++++++++++++++
 2 files changed, 56 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Transforms/IndVarSimplify/pr30121.ll

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 1d09d9b44a9e5..5dafd0dcd5ef5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2231,7 +2231,8 @@ Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp,
   //
   // NB: sge/sle with a constant will canonicalize to sgt/slt.
   if (Shl->hasNoSignedWrap() &&
-      (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT))
+      ((Pred == ICmpInst::ICMP_SGT && Shl->hasOneUse()) ||
+       Pred == ICmpInst::ICMP_SLT))
     if (C.isZero() || (Pred == ICmpInst::ICMP_SGT ? C.isAllOnes() : C.isOne()))
       return new ICmpInst(Pred, Shl->getOperand(0), Cmp.getOperand(1));
 
@@ -2251,7 +2252,7 @@ Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp,
   // NSW guarantees that we are only shifting out sign bits from the high bits,
   // so we can ASHR the compare constant without needing a mask and eliminate
   // the shift.
-  if (Shl->hasNoSignedWrap()) {
+  if (Shl->hasNoSignedWrap() && Shl->hasOneUse()) {
     if (Pred == ICmpInst::ICMP_SGT) {
       // icmp Pred (shl nsw X, ShiftAmt), C --> icmp Pred X, (C >>s ShiftAmt)
       APInt ShiftedC = C.ashr(*ShiftAmt);
diff --git a/llvm/test/Transforms/IndVarSimplify/pr30121.ll b/llvm/test/Transforms/IndVarSimplify/pr30121.ll
new file mode 100644
index 0000000000000..c69971d46e98f
--- /dev/null
+++ b/llvm/test/Transforms/IndVarSimplify/pr30121.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes='instcombine,indvars'  < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+
+define void @g(ptr nocapture noundef %a, i32 noundef signext %n) {
+; CHECK-LABEL: @g(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i32 [[N:%.*]], 4
+; CHECK-NEXT:    [[CMP11:%.*]] = icmp sgt i32 [[MUL]], 0
+; CHECK-NEXT:    br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[MUL]] to i64
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = shl nsw i32 [[TMP0]], 1
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
+;
+entry:
+  %mul = shl nsw i32 %n, 4
+  %cmp11 = icmp sgt i32 %mul, 0
+  br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.012 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %idxprom = zext nneg i32 %i.012 to i64
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %idxprom
+  %0 = load i32, ptr %arrayidx, align 4
+  %add = shl nsw i32 %0, 1
+  store i32 %add, ptr %arrayidx, align 4
+  %inc = add nuw nsw i32 %i.012, 1
+  %cmp = icmp slt i32 %inc, %mul
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}