[llvm] [InstCombine] limit icmp sgt (shl nsw X, C1), C0 --> icmp sgt X, C0 >> C1 to hasOneUse (PR #74318)

Liao Chunyu via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 4 17:30:29 PST 2024


https://github.com/ChunyuLiao updated https://github.com/llvm/llvm-project/pull/74318

>From 32d0a0883e8e786cf94dbcee65cfc9758daeef3b Mon Sep 17 00:00:00 2001
From: Liao Chunyu <chunyu at iscas.ac.cn>
Date: Sun, 3 Mar 2024 22:11:22 -0500
Subject: [PATCH 1/2] [IndVarSimplify]Add pre-commit tests for PR74318. NFC.

---
 .../test/Transforms/IndVarSimplify/pr30121.ll | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 llvm/test/Transforms/IndVarSimplify/pr30121.ll

diff --git a/llvm/test/Transforms/IndVarSimplify/pr30121.ll b/llvm/test/Transforms/IndVarSimplify/pr30121.ll
new file mode 100644
index 00000000000000..601f0d3702adc7
--- /dev/null
+++ b/llvm/test/Transforms/IndVarSimplify/pr30121.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes='instcombine,indvars'  < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+
+define void @g(ptr nocapture noundef %a, i32 noundef signext %n) {
+; CHECK-LABEL: @g(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = shl i32 [[N:%.*]], 4
+; CHECK-NEXT:    [[CMP11:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[MUL]], i32 1)
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = shl nsw i32 [[TMP0]], 1
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
+;
+entry:
+  %mul = shl nsw i32 %n, 4
+  %cmp11 = icmp sgt i32 %mul, 0
+  br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.012 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %idxprom = zext nneg i32 %i.012 to i64
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %idxprom
+  %0 = load i32, ptr %arrayidx, align 4
+  %add = shl nsw i32 %0, 1
+  store i32 %add, ptr %arrayidx, align 4
+  %inc = add nuw nsw i32 %i.012, 1
+  %cmp = icmp slt i32 %inc, %mul
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}

>From 27756785f3dcb0cf08110410336269a7c710f853 Mon Sep 17 00:00:00 2001
From: Liao Chunyu <chunyu at iscas.ac.cn>
Date: Mon, 4 Mar 2024 20:09:11 -0500
Subject: [PATCH 2/2] [InstCombine] limit icmp sgt (shl nsw X, C1), C0 --> icmp
 sgt X, C0 >> C1 to hasOneUse
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix: https://github.com/llvm/llvm-project/issues/30121

Testcase ir:  https://gcc.godbolt.org/z/oKTPE7v48
```
   %cmp11 = icmp sgt i32 %n, 0
   br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup

   for.body.preheader:                               ; preds = %entry
   %mul = shl i32 %n, 4
   %smax = tail call i32 @llvm.smax.i32(i32 %mul, i32 1)
   %wide.trip.count = zext nneg i32 %smax to i64
   br label %for.body
```

If add the limitations, @llvm.smax shouldn't be needed.

```
  %mul = shl nsw i32 %n, 4
  %cmp11 = icmp sgt i32 %mul, 0
  br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup

  for.body.preheader:                               ; preds = %entry
  %smax = tail call i32 @llvm.smax.i32(i32 %mul, i32 1) --- this shouldn't be needed

```
  If we need icmp sgt (shl nsw X, C1), C0 --> icmp sgt X, C0 >> C1,
  perhaps we can implement this at DAG combine.
---
 llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 5 +++--
 llvm/test/Transforms/IndVarSimplify/pr30121.ll          | 7 +++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 49e597171b1c6f..d22c76b06fc5fc 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2279,7 +2279,8 @@ Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp,
   //
   // NB: sge/sle with a constant will canonicalize to sgt/slt.
   if (Shl->hasNoSignedWrap() &&
-      (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT))
+      ((Pred == ICmpInst::ICMP_SGT && Shl->hasOneUse()) ||
+       Pred == ICmpInst::ICMP_SLT))
     if (C.isZero() || (Pred == ICmpInst::ICMP_SGT ? C.isAllOnes() : C.isOne()))
       return new ICmpInst(Pred, Shl->getOperand(0), Cmp.getOperand(1));
 
@@ -2299,7 +2300,7 @@ Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp,
   // NSW guarantees that we are only shifting out sign bits from the high bits,
   // so we can ASHR the compare constant without needing a mask and eliminate
   // the shift.
-  if (Shl->hasNoSignedWrap()) {
+  if (Shl->hasNoSignedWrap() && Shl->hasOneUse()) {
     if (Pred == ICmpInst::ICMP_SGT) {
       // icmp Pred (shl nsw X, ShiftAmt), C --> icmp Pred X, (C >>s ShiftAmt)
       APInt ShiftedC = C.ashr(*ShiftAmt);
diff --git a/llvm/test/Transforms/IndVarSimplify/pr30121.ll b/llvm/test/Transforms/IndVarSimplify/pr30121.ll
index 601f0d3702adc7..c69971d46e98f4 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr30121.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr30121.ll
@@ -6,12 +6,11 @@ target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
 define void @g(ptr nocapture noundef %a, i32 noundef signext %n) {
 ; CHECK-LABEL: @g(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = shl i32 [[N:%.*]], 4
-; CHECK-NEXT:    [[CMP11:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i32 [[N:%.*]], 4
+; CHECK-NEXT:    [[CMP11:%.*]] = icmp sgt i32 [[MUL]], 0
 ; CHECK-NEXT:    br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
 ; CHECK:       for.body.preheader:
-; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[MUL]], i32 1)
-; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[MUL]] to i64
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup.loopexit:
 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]



More information about the llvm-commits mailing list