[llvm] [IndVarSimplify] Do not replace ExitValue if it contains Udiv with non-constant divisor (PR #109945)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 25 02:58:19 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: None (ShivaChen)
<details>
<summary>Changes</summary>
Udiv bring high structural dependency and decrease performance. Adding containNonConstantDivisorUDiv checking to avoid the regression.
---
Full diff: https://github.com/llvm/llvm-project/pull/109945.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Utils/LoopUtils.cpp (+17)
- (modified) llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll (+6-23)
``````````diff
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 9a4289e1a30da0..ef57f2a4ee07ab 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1526,6 +1526,16 @@ static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE,
return InductionDescriptor::isInductionPHI(Phi, L, SE, ID);
}
+// Return true if S contains (A udiv B) and B is not a constant.
+static bool containNonConstantDivisorUDiv(const SCEV *S) {
+ return SCEVExprContains(S, [](const SCEV *S) {
+ if (auto *Udiv = dyn_cast<SCEVUDivExpr>(S)) {
+ return !isa<SCEVConstant>(Udiv->getRHS());
+ }
+ return false;
+ });
+}
+
int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
ScalarEvolution *SE,
const TargetTransformInfo *TTI,
@@ -1643,6 +1653,13 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
continue;
}
+ // Do not replace to ExitValue if it contains udiv with non-constant
+ // divisor. Because udiv with non-constant divisor will hard to be
+ // optimized out in the later optimization passes and will generate
+ // udiv eventually.
+ if (containNonConstantDivisorUDiv(ExitValue))
+ continue;
+
// Computing the value outside of the loop brings no benefit if it is
// definitely used inside the loop in a way which can not be optimized
// away. Avoid doing so unless we know we have a value which computes
diff --git a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll
index 1956f454a52bbf..d07d5e60d7dc5d 100644
--- a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll
+++ b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll
@@ -164,20 +164,15 @@ define i16 @pr57336(i16 %end, i16 %m) mustprogress {
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INC8:%.*]] = phi i16 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[INC137:%.*]] = phi i32 [ [[INC1:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[INC1]] = add nuw nsw i32 [[INC137]], 1
; CHECK-NEXT: [[INC]] = add nuw nsw i16 [[INC8]], 1
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i16 [[M:%.*]], [[INC8]]
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp slt i16 [[END:%.*]], [[MUL]]
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[CRIT_EDGE:%.*]], label [[FOR_BODY]]
; CHECK: crit_edge:
-; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[END]], 1
-; CHECK-NEXT: [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[TMP0]], i16 0)
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i16 [[END]], 32767
-; CHECK-NEXT: [[UMIN:%.*]] = zext i1 [[TMP1]] to i16
-; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i16 [[SMAX]], [[UMIN]]
-; CHECK-NEXT: [[UMAX:%.*]] = call i16 @llvm.umax.i16(i16 [[M]], i16 1)
-; CHECK-NEXT: [[TMP3:%.*]] = udiv i16 [[TMP2]], [[UMAX]]
-; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[TMP3]], [[UMIN]]
-; CHECK-NEXT: ret i16 [[TMP4]]
+; CHECK-NEXT: [[CONV:%.*]] = trunc i32 [[INC137]] to i16
+; CHECK-NEXT: ret i16 [[CONV]]
;
entry:
br label %for.body
@@ -217,13 +212,7 @@ define i32 @vscale_slt_with_vp_umin(ptr nocapture %A, i32 %n) mustprogress vscal
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.end:
-; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[N]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[TMP0]], [[VF]]
-; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], [[VSCALE]]
-; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 2
-; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[N]], [[TMP3]]
-; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[VF]], i32 [[TMP4]])
-; CHECK-NEXT: ret i32 [[UMIN]]
+; CHECK-NEXT: ret i32 [[VF_CAPPED]]
;
entry:
%vscale = call i32 @llvm.vscale.i32()
@@ -269,13 +258,7 @@ define i32 @vscale_slt_with_vp_umin2(ptr nocapture %A, i32 %n) mustprogress vsca
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.end:
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[TMP0]], [[VF]]
-; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], [[VSCALE]]
-; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 2
-; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[N]], [[TMP3]]
-; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[VF]], i32 [[TMP4]])
-; CHECK-NEXT: ret i32 [[UMIN]]
+; CHECK-NEXT: ret i32 [[VF_CAPPED]]
;
entry:
%vscale = call i32 @llvm.vscale.i32()
``````````
</details>
https://github.com/llvm/llvm-project/pull/109945
More information about the llvm-commits
mailing list