[llvm] [SCEVExpander] Support hoisting udiv X, Y where Y is non-zero (PR #96102)

Wed Jun 19 11:56:48 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Philip Reames (preames)

<details>
<summary>Changes</summary>

We already had the constant check, but there are cases we can prove that non-constant SCEVs are non-zero as well.  Note that SCEVExpander has multiply independent mechanisms for reasoning about hoistability, and that this particular one ends up not really influencing the placement of the udiv itself, but instead mostly the placement of the using instructions generated from scev formula which includes a udiv as an inner node.

My motivating example is 4 x vscale, which shows up in scalably vectorized loops.  Note that simple examples are caught by sinking logic after expansion by indvars, so the vscale case doesn't really show up in the diff.

---
Full diff: https://github.com/llvm/llvm-project/pull/96102.diff


4 Files Affected:

- (modified) llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp (+4-5) 
- (modified) llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll (+6-10) 
- (modified) llvm/test/Transforms/LoopVectorize/pr38697.ll (+3-4) 
- (modified) llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll (+11-8) 


``````````diff

diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index c7d758aa575e6..68c0132201692 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -1470,12 +1470,11 @@ Value *SCEVExpander::expand(const SCEV *S) {
 
   // We can move insertion point only if there is no div or rem operations
   // otherwise we are risky to move it over the check for zero denominator.
-  auto SafeToHoist = [](const SCEV *S) {
-    return !SCEVExprContains(S, [](const SCEV *S) {
+  auto SafeToHoist = [&](const SCEV *S) {
+    return !SCEVExprContains(S, [&](const SCEV *S) {
               if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
-                if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
-                  // Division by non-zero constants can be hoisted.
-                  return SC->getValue()->isZero();
+                if (SE.isKnownNonZero(D->getRHS()))
+                  return false;
                 // All other divisions should not be moved as they may be
                 // divisions by zero and should be kept within the
                 // conditions of the surrounding loops that guard their
diff --git a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll
index 653970cc34022..e4b2b38d1c61d 100644
--- a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll
+++ b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll
@@ -163,18 +163,14 @@ define i16 @pr57336(i16 %end, i16 %m) mustprogress {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INC8:%.*]] = phi i16 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[INC]] = add nuw nsw i16 [[INC8]], 1
-; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i16 [[INC8]], [[M:%.*]]
-; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp sgt i16 [[MUL]], [[END:%.*]]
-; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[CRIT_EDGE:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    br i1 true, label [[CRIT_EDGE:%.*]], label [[FOR_BODY]]
 ; CHECK:       crit_edge:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i16 [[END]], 1
-; CHECK-NEXT:    [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[TMP0]], i16 0)
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i16 [[END]], 32767
-; CHECK-NEXT:    [[UMIN:%.*]] = zext i1 [[TMP1]] to i16
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i16 [[END:%.*]], 32767
+; CHECK-NEXT:    [[UMIN:%.*]] = zext i1 [[TMP0]] to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = add i16 [[END]], 1
+; CHECK-NEXT:    [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[TMP1]], i16 0)
 ; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw i16 [[SMAX]], [[UMIN]]
-; CHECK-NEXT:    [[UMAX:%.*]] = call i16 @llvm.umax.i16(i16 [[M]], i16 1)
+; CHECK-NEXT:    [[UMAX:%.*]] = call i16 @llvm.umax.i16(i16 [[M:%.*]], i16 1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = udiv i16 [[TMP2]], [[UMAX]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i16 [[TMP3]], [[UMIN]]
 ; CHECK-NEXT:    ret i16 [[TMP4]]
diff --git a/llvm/test/Transforms/LoopVectorize/pr38697.ll b/llvm/test/Transforms/LoopVectorize/pr38697.ll
index dd2d6fcab7114..270e572f252a4 100644
--- a/llvm/test/Transforms/LoopVectorize/pr38697.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr38697.ll
@@ -113,13 +113,12 @@ exit:                                             ; preds = %loop1.inc, %entry
 ; guaranted that 'counter1' is always non-zero.  So it is safe to hoist the
 ; division from the inner loop to the preheader.
 ;
-; Verify that the 'udiv' is hoisted to the preheader, and is not in the loop body.
+; Verify that the 'udiv' is hoisted to the outer loop's preheader, and is
+; not in the inner loop body.
 define i32 @NonZeroDivHoist(ptr nocapture readonly %ptr, i32 %start1, i32 %start2) {
 ; INDVARCHECK-LABEL: @NonZeroDivHoist(
 ; INDVARCHECK-NEXT:  entry:
-; INDVARCHECK:       for.body3.lr.ph:
-; INDVARCHECK-NEXT:    [[TMP0:%.*]] = udiv i64 16, [[INDVARS_IV:%.*]]
-; INDVARCHECK-NEXT:    br label [[FOR_BODY3:%.*]]
+; INDVARCHECK:         udiv
 ; INDVARCHECK:       for.body3:
 ; INDVARCHECK-NOT:     udiv
 ; INDVARCHECK:       for.end10:
diff --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll
index f0220f5e766b2..43dbb060c75ec 100644
--- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll
@@ -11,26 +11,29 @@ define void @test(i16 %x, i64 %y, ptr %ptr) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CONV19:%.*]] = sext i16 [[X:%.*]] to i64
 ; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[CONV19]], 492802768830814067
+; CHECK-NEXT:    [[TMP0:%.*]] = udiv i64 [[Y:%.*]], [[ADD]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = udiv i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw i64 [[TMP0]], 1
 ; CHECK-NEXT:    br label [[LOOP_PREHEADER:%.*]]
 ; CHECK:       loop.preheader:
-; CHECK-NEXT:    [[DIV:%.*]] = udiv i64 [[Y:%.*]], [[ADD]]
+; CHECK-NEXT:    [[DIV:%.*]] = udiv i64 [[Y]], [[ADD]]
 ; CHECK-NEXT:    [[INC:%.*]] = add i64 [[DIV]], 1
-; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i64 [[DIV]], 4
-; CHECK-NEXT:    [[TMP1:%.*]] = udiv i64 [[TMP0]], [[INC]]
-; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_RND_UP:%.*]] = add i64 [[TMP2]], 1
+; CHECK-NEXT:    [[N_RND_UP:%.*]] = add i64 [[TMP4]], 1
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[IND_END:%.*]] = mul i64 [[N_VEC]], [[INC]]
+; CHECK-NEXT:    [[IND_END:%.*]] = mul i64 [[N_VEC]], [[TMP5]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    store i32 0, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:

``````````

</details>


https://github.com/llvm/llvm-project/pull/96102