[llvm] 7861cb6 - [NARY] Don't optimize min/max if there are side uses (part2)
Evgeniy Brevnov via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 30 05:02:23 PDT 2021
Author: Evgeniy Brevnov
Date: 2021-04-30T19:02:02+07:00
New Revision: 7861cb600cd6f1905df3d1055ea910a07e2c3c4f
URL: https://github.com/llvm/llvm-project/commit/7861cb600cd6f1905df3d1055ea910a07e2c3c4f
DIFF: https://github.com/llvm/llvm-project/commit/7861cb600cd6f1905df3d1055ea910a07e2c3c4f.diff
LOG: [NARY] Don't optimize min/max if there are side uses (part2)
Previous attempt to fix infinite recursion in min/max reassociation was not fully successful (D100170). Newly discovered failing case is due to not properly handled when there is a single use. It should be processed separately from 2 uses case.
Reviewed By: mkazantsev
Differential Revision: https://reviews.llvm.org/D101359
Added:
Modified:
llvm/lib/Transforms/Scalar/NaryReassociate.cpp
llvm/test/Transforms/NaryReassociate/nary-req.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
index 2284142c8aa10..ded5caf53b5a9 100644
--- a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -585,6 +585,11 @@ template <typename MaxMinT> static SCEVTypes convertToSCEVype(MaxMinT &MM) {
return scUnknown;
}
+// Parameters:
+// I - instruction matched by MaxMinMatch matcher
+// MaxMinMatch - min/max idiom matcher
+// LHS - first operand of I
+// RHS - second operand of I
template <typename MaxMinT>
Value *NaryReassociatePass::tryReassociateMinOrMax(Instruction *I,
MaxMinT MaxMinMatch,
@@ -612,9 +617,10 @@ Value *NaryReassociatePass::tryReassociateMinOrMax(Instruction *I,
// The optimization is profitable only if LHS can be removed in the end.
// In other words LHS should be used (directly or indirectly) by I only.
- for (User *U : LHS->users())
- if (U != I || !(U->hasOneUser() && *U->users().begin() == I))
- continue;
+ if (llvm::any_of(LHS->users(), [&](auto *U) {
+ return U != I && !(U->hasOneUser() && *U->users().begin() == I);
+ }))
+ continue;
SCEVExpander Expander(*SE, *DL, "nary-reassociate");
SmallVector<const SCEV *, 2> Ops1{ BExpr, AExpr };
diff --git a/llvm/test/Transforms/NaryReassociate/nary-req.ll b/llvm/test/Transforms/NaryReassociate/nary-req.ll
index 020bae84aee96..5c4efc8e0da08 100644
--- a/llvm/test/Transforms/NaryReassociate/nary-req.ll
+++ b/llvm/test/Transforms/NaryReassociate/nary-req.ll
@@ -3,9 +3,10 @@
; RUN: opt < %s -passes='nary-reassociate' -S | FileCheck %s
declare i32 @llvm.smax.i32(i32 %a, i32 %b)
+declare i64 @llvm.umin.i64(i64, i64)
; This is a negative test. We should not optimize if intermediate result
-; has a use outside of optimizaple pattern. In other words %smax2 has one
+; has a use outside of optimizable pattern. In other words %smax2 has one
; use from %smax3 and side use from %res2.
define i32 @smax_test1(i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: @smax_test1(
@@ -30,3 +31,36 @@ define i32 @smax_test1(i32 %a, i32 %b, i32 %c) {
ret i32 %res
}
+; This is a negative test. It similar to the previous one
+; but a bit more complex. In particular after first iteration
+; e10 is replaced with %e10.nary = call i64 @llvm.umin.i64(i64 %e5, i64 %e).
+; No more reassociation should be applied to %e10.nary since
+; %e5 has side use in %e6.
+define void @test2(i64 %arg) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[E:%.*]] = sub i64 undef, 0
+; CHECK-NEXT: [[E1:%.*]] = sub i64 [[ARG:%.*]], 0
+; CHECK-NEXT: [[E2:%.*]] = call i64 @llvm.umin.i64(i64 [[E]], i64 [[E1]])
+; CHECK-NEXT: [[E3:%.*]] = call i64 @llvm.umin.i64(i64 [[E2]], i64 16384)
+; CHECK-NEXT: [[E4:%.*]] = sub i64 [[ARG]], 0
+; CHECK-NEXT: [[E5:%.*]] = call i64 @llvm.umin.i64(i64 [[E4]], i64 16384)
+; CHECK-NEXT: [[E6:%.*]] = icmp ugt i64 [[E5]], 0
+; CHECK-NEXT: [[E10_NARY:%.*]] = call i64 @llvm.umin.i64(i64 [[E5]], i64 [[E]])
+; CHECK-NEXT: unreachable
+;
+bb:
+ %e = sub i64 undef, 0
+ %e1 = sub i64 %arg, 0
+ %e2 = call i64 @llvm.umin.i64(i64 %e, i64 %e1)
+ %e3 = call i64 @llvm.umin.i64(i64 %e2, i64 16384)
+ %e4 = sub i64 %arg, 0
+ %e5 = call i64 @llvm.umin.i64(i64 %e4, i64 16384)
+ %e6 = icmp ugt i64 %e5, 0
+ %e7 = sub i64 undef, 0
+ %e8 = sub i64 %arg, 0
+ %e9 = call i64 @llvm.umin.i64(i64 %e7, i64 %e8)
+ %e10 = call i64 @llvm.umin.i64(i64 %e9, i64 16384)
+ unreachable
+}
+
More information about the llvm-commits
mailing list