[llvm] 1344b65 - [SCEV] Fix incorrect NUW inference (#70521)

via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 31 11:44:02 PDT 2023


Author: Daniil Suchkov
Date: 2023-10-31T11:43:57-07:00
New Revision: 1344b65c90507e2368a3d0678df9f179e8890665

URL: https://github.com/llvm/llvm-project/commit/1344b65c90507e2368a3d0678df9f179e8890665
DIFF: https://github.com/llvm/llvm-project/commit/1344b65c90507e2368a3d0678df9f179e8890665.diff

LOG: [SCEV] Fix incorrect NUW inference (#70521)

This patch fixes a miscompile in LSR caused by incorrect inference of
NUW flag for AddRec: we shouldn't infer no-wrap flags based on a
comparison which doesn't fully control the loop exit.

Added: 
    

Modified: 
    llvm/lib/Analysis/ScalarEvolution.cpp
    llvm/test/Transforms/LoopStrengthReduce/scev-incorrect-nuw-inference.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index f13e508a6c454bd..de24aa986688a1e 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -12640,6 +12640,11 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ZExt->getOperand());
       if (AR && AR->getLoop() == L && AR->isAffine()) {
         auto canProveNUW = [&]() {
+          // We can use the comparison to infer no-wrap flags only if it fully
+          // controls the loop exit.
+          if (!ControlsOnlyExit)
+            return false;
+
           if (!isLoopInvariant(RHS, L))
             return false;
 

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/scev-incorrect-nuw-inference.ll b/llvm/test/Transforms/LoopStrengthReduce/scev-incorrect-nuw-inference.ll
index 3a9cb160e88d7bc..1b37b4cd2a2d8b9 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/scev-incorrect-nuw-inference.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/scev-incorrect-nuw-inference.ll
@@ -5,38 +5,33 @@
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
 target triple = "x86_64-unknown-linux-gnu"
 
-; FIXME: the returned value should be equal to
-;     zext (trunk (%phi-1) to i16) to i64
-;   or simply
+; The returned value should be equal to
 ;     zext (%phi-1) to i64
-; which means it should be equal to 1209. Currently, due to a bug in SCEV, it's
-; over 65534.
+; or simply 1209.
 define noundef i64 @test() {
 ; CHECK-LABEL: define noundef i64 @test() {
 ; CHECK-NEXT:  bb2:
 ; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[BB10:%.*]] ], [ 0, [[BB2:%.*]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[LSR_IV]], 65535
-; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[LSR_IV]], 65407
-; CHECK-NEXT:    [[ICMP5:%.*]] = icmp ult i64 [[TMP1]], -256
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32
-; CHECK-NEXT:    [[ICMP6:%.*]] = icmp ult i32 [[TMP2]], 128
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[BB10:%.*]] ], [ -1, [[BB2:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[LSR_IV]], 65536
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[TMP0]], 65535
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[AND]] to i64
+; CHECK-NEXT:    [[ADD4:%.*]] = add nsw i64 [[ZEXT]], -128
+; CHECK-NEXT:    [[ICMP5:%.*]] = icmp ult i64 [[ADD4]], -256
+; CHECK-NEXT:    [[ICMP6:%.*]] = icmp ult i32 [[AND]], 128
 ; CHECK-NEXT:    [[OR:%.*]] = or i1 [[ICMP5]], [[ICMP6]]
 ; CHECK-NEXT:    br i1 [[OR]], label [[BB10]], label [[BB7:%.*]]
 ; CHECK:       bb7:
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[LSR_IV]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[LSR_IV]], 1
 ; CHECK-NEXT:    call void @foo(i32 [[TMP1]])
 ; CHECK-NEXT:    unreachable
 ; CHECK:       bb10:
-; CHECK-NEXT:    [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[LSR_IV_NEXT]], -1
-; CHECK-NEXT:    [[TMP:%.*]] = trunc i64 [[TMP2]] to i32
-; CHECK-NEXT:    [[ICMP12:%.*]] = icmp ult i32 [[TMP]], 1210
+; CHECK-NEXT:    [[LSR_IV_NEXT]] = add nsw i32 [[LSR_IV]], 1
+; CHECK-NEXT:    [[ICMP12:%.*]] = icmp ult i32 [[LSR_IV_NEXT]], 1210
 ; CHECK-NEXT:    br i1 [[ICMP12]], label [[BB3]], label [[BB13:%.*]]
 ; CHECK:       bb13:
-; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[LSR_IV_NEXT]], 65534
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    ret i64 [[ZEXT]]
 ;
 bb2:
   br label %bb3


        


More information about the llvm-commits mailing list