[llvm] 1344b65 - [SCEV] Fix incorrect NUW inference (#70521)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 31 11:44:02 PDT 2023
Author: Daniil Suchkov
Date: 2023-10-31T11:43:57-07:00
New Revision: 1344b65c90507e2368a3d0678df9f179e8890665
URL: https://github.com/llvm/llvm-project/commit/1344b65c90507e2368a3d0678df9f179e8890665
DIFF: https://github.com/llvm/llvm-project/commit/1344b65c90507e2368a3d0678df9f179e8890665.diff
LOG: [SCEV] Fix incorrect NUW inference (#70521)
This patch fixes a miscompile in LSR caused by incorrect inference of
NUW flag for AddRec: we shouldn't infer no-wrap flags based on a
comparison which doesn't fully control the loop exit.
Added:
Modified:
llvm/lib/Analysis/ScalarEvolution.cpp
llvm/test/Transforms/LoopStrengthReduce/scev-incorrect-nuw-inference.ll
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index f13e508a6c454bd..de24aa986688a1e 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -12640,6 +12640,11 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ZExt->getOperand());
if (AR && AR->getLoop() == L && AR->isAffine()) {
auto canProveNUW = [&]() {
+ // We can use the comparison to infer no-wrap flags only if it fully
+ // controls the loop exit.
+ if (!ControlsOnlyExit)
+ return false;
+
if (!isLoopInvariant(RHS, L))
return false;
diff --git a/llvm/test/Transforms/LoopStrengthReduce/scev-incorrect-nuw-inference.ll b/llvm/test/Transforms/LoopStrengthReduce/scev-incorrect-nuw-inference.ll
index 3a9cb160e88d7bc..1b37b4cd2a2d8b9 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/scev-incorrect-nuw-inference.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/scev-incorrect-nuw-inference.ll
@@ -5,38 +5,33 @@
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
target triple = "x86_64-unknown-linux-gnu"
-; FIXME: the returned value should be equal to
-; zext (trunk (%phi-1) to i16) to i64
-; or simply
+; The returned value should be equal to
; zext (%phi-1) to i64
-; which means it should be equal to 1209. Currently, due to a bug in SCEV, it's
-; over 65534.
+; or simply 1209.
define noundef i64 @test() {
; CHECK-LABEL: define noundef i64 @test() {
; CHECK-NEXT: bb2:
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb3:
-; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[BB10:%.*]] ], [ 0, [[BB2:%.*]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LSR_IV]], 65535
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[LSR_IV]], 65407
-; CHECK-NEXT: [[ICMP5:%.*]] = icmp ult i64 [[TMP1]], -256
-; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32
-; CHECK-NEXT: [[ICMP6:%.*]] = icmp ult i32 [[TMP2]], 128
+; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[BB10:%.*]] ], [ -1, [[BB2:%.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LSR_IV]], 65536
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[TMP0]], 65535
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[AND]] to i64
+; CHECK-NEXT: [[ADD4:%.*]] = add nsw i64 [[ZEXT]], -128
+; CHECK-NEXT: [[ICMP5:%.*]] = icmp ult i64 [[ADD4]], -256
+; CHECK-NEXT: [[ICMP6:%.*]] = icmp ult i32 [[AND]], 128
; CHECK-NEXT: [[OR:%.*]] = or i1 [[ICMP5]], [[ICMP6]]
; CHECK-NEXT: br i1 [[OR]], label [[BB10]], label [[BB7:%.*]]
; CHECK: bb7:
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[LSR_IV]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[LSR_IV]], 1
; CHECK-NEXT: call void @foo(i32 [[TMP1]])
; CHECK-NEXT: unreachable
; CHECK: bb10:
-; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[LSR_IV_NEXT]], -1
-; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[TMP2]] to i32
-; CHECK-NEXT: [[ICMP12:%.*]] = icmp ult i32 [[TMP]], 1210
+; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i32 [[LSR_IV]], 1
+; CHECK-NEXT: [[ICMP12:%.*]] = icmp ult i32 [[LSR_IV_NEXT]], 1210
; CHECK-NEXT: br i1 [[ICMP12]], label [[BB3]], label [[BB13:%.*]]
; CHECK: bb13:
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[LSR_IV_NEXT]], 65534
-; CHECK-NEXT: ret i64 [[TMP3]]
+; CHECK-NEXT: ret i64 [[ZEXT]]
;
bb2:
br label %bb3
More information about the llvm-commits
mailing list