[llvm] [indvars] Support known positive extends in getExtendedOperandRecurrence (PR #70990)
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 1 14:40:41 PDT 2023
https://github.com/preames created https://github.com/llvm/llvm-project/pull/70990
IndVars has the existing notion of a narrow definition which is known to positive and thus both sign and zero extension kinds are actually the same operations. There's existing logic for forming a SCEV based on the extension kind and the no-wrap flags. This change extends that logic to form the opposite extension kind for a positive def if doing so is allowed by the flags. Note that we already do something analogous for the getWideRecurrence case as well.
>From 108b56c66e748721200626cd412b700cac484ff5 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Wed, 1 Nov 2023 11:42:12 -0700
Subject: [PATCH] [indvars] Support known positive extends in
getExtendedOperandRecurrence
IndVars has the existing notion of a narrow definition which is known to
positive and thus both sign and zero extension kinds are actually the
same operations. There's existing logic for forming a SCEV based on the
extension kind and the no-wrap flags. This change extends that logic
to form the opposite extension kind for a positive def if doing so is
allowed by the flags. Note that we already do something analogous
for the getWideRecurrence case as well.
---
llvm/lib/Transforms/Utils/SimplifyIndVar.cpp | 17 +++++++++++-
.../promote-iv-to-eliminate-casts.ll | 6 ++---
llvm/test/Transforms/LoopFlatten/widen-iv.ll | 26 +++++++------------
3 files changed, 29 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index ae3644183a735bc..a3763c4068842f4 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -1384,7 +1384,22 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) {
else if (ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap())
ExtendOperExpr = SE->getZeroExtendExpr(
SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
- else
+ else if (DU.NeverNegative) {
+ // For a non-negative NarrowDef, we can choose either type of
+ // extension. We want to use the current extend kind if legal
+ // (see above), and we only hit this code if we need to check
+ // the opposite case.
+ if (OBO->hasNoSignedWrap()) {
+ ExtKind = ExtendKind::Sign;
+ ExtendOperExpr = SE->getSignExtendExpr(
+ SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
+ } else if (OBO->hasNoUnsignedWrap()) {
+ ExtKind = ExtendKind::Zero;
+ ExtendOperExpr = SE->getZeroExtendExpr(
+ SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
+ } else
+ return {nullptr, ExtendKind::Unknown};
+ } else
return {nullptr, ExtendKind::Unknown};
// When creating this SCEV expr, don't apply the current operations NSW or NUW
diff --git a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
index d912540c95657f3..60e014b0efca53a 100644
--- a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
+++ b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
@@ -196,8 +196,8 @@ define void @promote_latch_condition_decrementing_loop_01(ptr %p, ptr %a) {
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[TMP0]], [[PREHEADER]] ]
; CHECK-NEXT: [[EL:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store atomic i32 0, ptr [[EL]] unordered, align 4
-; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
+; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
;
@@ -336,8 +336,8 @@ define void @promote_latch_condition_decrementing_loop_04(ptr %p, ptr %a, i1 %co
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[TMP0]], [[PREHEADER]] ]
; CHECK-NEXT: [[EL:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store atomic i32 0, ptr [[EL]] unordered, align 4
-; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
+; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
;
@@ -398,8 +398,8 @@ define void @promote_latch_condition_decrementing_loop_05(ptr %p, ptr %a, i1 %co
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[TMP0]], [[PREHEADER]] ]
; CHECK-NEXT: [[EL:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store atomic i32 0, ptr [[EL]] unordered, align 4
-; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
+; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
;
diff --git a/llvm/test/Transforms/LoopFlatten/widen-iv.ll b/llvm/test/Transforms/LoopFlatten/widen-iv.ll
index 4692fa829bac5c7..4b46dcde3b04d2e 100644
--- a/llvm/test/Transforms/LoopFlatten/widen-iv.ll
+++ b/llvm/test/Transforms/LoopFlatten/widen-iv.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -S -passes='loop-simplify,loop(loop-flatten),dce,verify' -loop-flatten-widen-iv=true \
+; RUN: opt < %s -S -passes='loop-simplify,loop(loop-flatten),adce,verify' -loop-flatten-widen-iv=true \
; RUN: -verify-loop-info -verify-dom-info -verify-scev \
; RUN: -loop-flatten-cost-threshold=6 | \
; RUN: FileCheck %s --check-prefix=CHECK
-; RUN: opt < %s -S -passes='loop-simplify,loop(loop-flatten),dce,verify' -loop-flatten-widen-iv=false \
+; RUN: opt < %s -S -passes='loop-simplify,loop(loop-flatten),adce,verify' -loop-flatten-widen-iv=false \
; RUN: -verify-loop-info -verify-dom-info -verify-scev | \
; RUN: FileCheck %s --check-prefix=DONTWIDEN
@@ -29,17 +29,15 @@ define void @foo(ptr %A, i32 %N, i32 %M) {
; CHECK-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = mul i64 [[TMP0]], [[TMP1]]
; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]]
; CHECK: for.cond1.preheader.us:
-; CHECK-NEXT: [[INDVAR1:%.*]] = phi i64 [ [[INDVAR_NEXT2:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ]
-; CHECK-NEXT: [[FLATTEN_TRUNCIV:%.*]] = trunc i64 [[INDVAR1]] to i32
+; CHECK-NEXT: [[INDVAR2:%.*]] = phi i64 [ [[INDVAR_NEXT3:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]]
; CHECK: for.body4.us:
-; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[FLATTEN_TRUNCIV]] to i64
-; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]]
+; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVAR2]]
; CHECK-NEXT: tail call void @f(ptr [[ARRAYIDX_US]])
; CHECK-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]]
; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us:
-; CHECK-NEXT: [[INDVAR_NEXT2]] = add i64 [[INDVAR1]], 1
-; CHECK-NEXT: [[CMP_US:%.*]] = icmp slt i64 [[INDVAR_NEXT2]], [[FLATTEN_TRIPCOUNT]]
+; CHECK-NEXT: [[INDVAR_NEXT3]] = add i64 [[INDVAR2]], 1
+; CHECK-NEXT: [[CMP_US:%.*]] = icmp slt i64 [[INDVAR_NEXT3]], [[FLATTEN_TRIPCOUNT]]
; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
; CHECK: for.cond.cleanup.loopexit:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
@@ -138,7 +136,6 @@ define void @foo2_sext(i32* nocapture readonly %A, i32 %N, i32 %M) {
; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]]
; CHECK: for.cond1.preheader.us:
; CHECK-NEXT: [[INDVAR2:%.*]] = phi i64 [ [[INDVAR_NEXT3:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ]
-; CHECK-NEXT: [[I_018_US:%.*]] = phi i32 [ [[INC6_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]]
; CHECK: for.body4.us:
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVAR2]]
@@ -147,7 +144,6 @@ define void @foo2_sext(i32* nocapture readonly %A, i32 %N, i32 %M) {
; CHECK-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]]
; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us:
; CHECK-NEXT: [[INDVAR_NEXT3]] = add i64 [[INDVAR2]], 1
-; CHECK-NEXT: [[INC6_US]] = add nuw nsw i32 [[I_018_US]], 1
; CHECK-NEXT: [[CMP_US:%.*]] = icmp slt i64 [[INDVAR_NEXT3]], [[FLATTEN_TRIPCOUNT]]
; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
; CHECK: for.cond1.preheader:
@@ -1002,17 +998,15 @@ define void @foo_M_sext(ptr %A, i32 %N, i16 %M) {
; CHECK-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = mul i64 [[TMP0]], [[TMP1]]
; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]]
; CHECK: for.cond1.preheader.us:
-; CHECK-NEXT: [[INDVAR1:%.*]] = phi i64 [ [[INDVAR_NEXT2:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ]
-; CHECK-NEXT: [[FLATTEN_TRUNCIV:%.*]] = trunc i64 [[INDVAR1]] to i32
+; CHECK-NEXT: [[INDVAR2:%.*]] = phi i64 [ [[INDVAR_NEXT3:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]]
; CHECK: for.body4.us:
-; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[FLATTEN_TRUNCIV]] to i64
-; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]]
+; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVAR2]]
; CHECK-NEXT: tail call void @f(ptr [[ARRAYIDX_US]])
; CHECK-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]]
; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us:
-; CHECK-NEXT: [[INDVAR_NEXT2]] = add i64 [[INDVAR1]], 1
-; CHECK-NEXT: [[CMP_US:%.*]] = icmp slt i64 [[INDVAR_NEXT2]], [[FLATTEN_TRIPCOUNT]]
+; CHECK-NEXT: [[INDVAR_NEXT3]] = add i64 [[INDVAR2]], 1
+; CHECK-NEXT: [[CMP_US:%.*]] = icmp slt i64 [[INDVAR_NEXT3]], [[FLATTEN_TRIPCOUNT]]
; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
; CHECK: for.cond.cleanup.loopexit:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
More information about the llvm-commits
mailing list