[llvm] r340777 - Revert "[SCEV][NFC] Check NoWrap flags before lexicographical comparison of SCEVs"

Mon Aug 27 14:41:37 PDT 2018

Author: rtereshin
Date: Mon Aug 27 14:41:37 2018
New Revision: 340777

URL: http://llvm.org/viewvc/llvm-project?rev=340777&view=rev
Log:
Revert "[SCEV][NFC] Check NoWrap flags before lexicographical comparison of SCEVs"

This reverts r319889.

Unfortunately, wrapping flags are not a part of SCEV's identity (they
do not participate in computing a hash value or in equality
comparisons) and in fact they could be assigned after the fact w/o
rebuilding a SCEV.

Grep for const_cast's to see quite a few of examples, apparently all
for AddRec's at the moment.

So, if 2 expressions get built in 2 slightly different ways: one with
flags set in the beginning, the other with the flags attached later
on, we may end up with 2 expressions which are exactly the same but
have their operands swapped in one of the commutative N-ary
expressions, and at least one of them will have "sorted by complexity"
invariant broken.

2 identical SCEV's won't compare equal by pointer comparison as they
are supposed to.

A real-world reproducer is added as a regression test: the issue
described causes 2 identical SCEV expressions to have different order
of operands and therefore compare not equal, which in its turn
prevents LoadStoreVectorizer from vectorizing a pair of consecutive
loads.

On a larger example (the source of the test attached, which is a
bugpoint) I have seen even weirder behavior: adding a constant to an
existing SCEV changes the order of the existing terms, for instance,
getAddExpr(1, ((A * B) + (C * D))) returns (1 + (C * D) + (A * B)).

Differential Revision: https://reviews.llvm.org/D40645

Added:
    llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/compare-scev-by-complexity.ll
Modified:
    llvm/trunk/lib/Analysis/ScalarEvolution.cpp
    llvm/trunk/test/Transforms/IRCE/conjunctive-checks.ll
    llvm/trunk/test/Transforms/IRCE/single-access-no-preloop.ll
    llvm/trunk/test/Transforms/LoopVectorize/X86/pr35432.ll

Modified: llvm/trunk/lib/Analysis/ScalarEvolution.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolution.cpp?rev=340777&r1=340776&r2=340777&view=diff
==============================================================================

--- llvm/trunk/lib/Analysis/ScalarEvolution.cpp (original)
+++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp Mon Aug 27 14:41:37 2018
@@ -692,10 +692,6 @@ static int CompareSCEVComplexity(
     if (LNumOps != RNumOps)
       return (int)LNumOps - (int)RNumOps;
 
-    // Compare NoWrap flags.
-    if (LA->getNoWrapFlags() != RA->getNoWrapFlags())
-      return (int)LA->getNoWrapFlags() - (int)RA->getNoWrapFlags();
-
     // Lexicographically compare.
     for (unsigned i = 0; i != LNumOps; ++i) {
       int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
@@ -720,10 +716,6 @@ static int CompareSCEVComplexity(
     if (LNumOps != RNumOps)
       return (int)LNumOps - (int)RNumOps;
 
-    // Compare NoWrap flags.
-    if (LC->getNoWrapFlags() != RC->getNoWrapFlags())
-      return (int)LC->getNoWrapFlags() - (int)RC->getNoWrapFlags();
-
     for (unsigned i = 0; i != LNumOps; ++i) {
       int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
                                     LC->getOperand(i), RC->getOperand(i), DT,

Modified: llvm/trunk/test/Transforms/IRCE/conjunctive-checks.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/IRCE/conjunctive-checks.ll?rev=340777&r1=340776&r2=340777&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/IRCE/conjunctive-checks.ll (original)
+++ llvm/trunk/test/Transforms/IRCE/conjunctive-checks.ll Mon Aug 27 14:41:37 2018
@@ -5,10 +5,10 @@ define void @f_0(i32 *%arr, i32 *%a_len_
 ; CHECK-LABEL: @f_0(
 
 ; CHECK: loop.preheader:
-; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len
 ; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
-; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_safe_range_end]], [[not_n]]
-; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_safe_range_end]], i32 [[not_n]]
+; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len
+; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]]
+; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]]
 ; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]]
 ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0
 ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0

Modified: llvm/trunk/test/Transforms/IRCE/single-access-no-preloop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/IRCE/single-access-no-preloop.ll?rev=340777&r1=340776&r2=340777&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/IRCE/single-access-no-preloop.ll (original)
+++ llvm/trunk/test/Transforms/IRCE/single-access-no-preloop.ll Mon Aug 27 14:41:37 2018
@@ -86,10 +86,10 @@ define void @single_access_no_preloop_wi
 ; CHECK-LABEL: @single_access_no_preloop_with_offset(
 
 ; CHECK: loop.preheader:
-; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len
 ; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
-; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_safe_range_end]], [[not_n]]
-; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_safe_range_end]], i32 [[not_n]]
+; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len
+; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]]
+; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]]
 ; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]]
 ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0
 ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0

Added: llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/compare-scev-by-complexity.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/compare-scev-by-complexity.ll?rev=340777&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/compare-scev-by-complexity.ll (added)
+++ llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/compare-scev-by-complexity.ll Mon Aug 27 14:41:37 2018
@@ -0,0 +1,76 @@
+; RUN: opt -load-store-vectorizer %s -S | FileCheck %s
+
+; Check that setting wrapping flags after a SCEV node is created
+; does not invalidate "sorted by complexity" invariant for
+; operands of commutative and associative SCEV operators.
+
+target triple = "x86_64--"
+
+ at global_value0 = external constant i32
+ at global_value1 = external constant i32
+ at other_value = external global float
+ at a = external global float
+ at b = external global float
+ at c = external global float
+ at d = external global float
+ at plus1 = external global i32
+ at cnd = external global i8
+
+; Function Attrs: nounwind
+define void @main() local_unnamed_addr #0 {
+; CHECK-LABEL: @main()
+; CHECK: [[PTR:%[0-9]+]] = bitcast float* %preheader.load0.address to <2 x float>*
+; CHECK:  = load <2 x float>, <2 x float>* [[PTR]]
+; CHECK-LABEL: for.body23:
+entry:
+  %tmp = load i32, i32* @global_value0, !range !0
+  %tmp2 = load i32, i32* @global_value1
+  %and.i.i = and i32 %tmp2, 2
+  %add.nuw.nsw.i.i = add nuw nsw i32 %and.i.i, 0
+  %mul.i.i = shl nuw nsw i32 %add.nuw.nsw.i.i, 1
+  %and6.i.i = and i32 %tmp2, 3
+  %and9.i.i = and i32 %tmp2, 4
+  %add.nuw.nsw10.i.i = add nuw nsw i32 %and6.i.i, %and9.i.i
+  %conv3.i42.i = add nuw nsw i32 %mul.i.i, 1
+  %reass.add346.7 = add nuw nsw i32 %add.nuw.nsw10.i.i, 56
+  %reass.mul347.7 = mul nuw nsw i32 %tmp, %reass.add346.7
+  %add7.i.7 = add nuw nsw i32 %reass.mul347.7, 0
+  %preheader.address0.idx = add nuw nsw i32 %add7.i.7, %mul.i.i
+  %preheader.address0.idx.zext = zext i32 %preheader.address0.idx to i64
+  %preheader.load0.address = getelementptr inbounds float, float* @other_value, i64 %preheader.address0.idx.zext
+  %preheader.load0. = load float, float* %preheader.load0.address, align 4, !tbaa !1
+  %common.address.idx = add nuw nsw i32 %add7.i.7, %conv3.i42.i
+  %preheader.header.common.address.idx.zext = zext i32 %common.address.idx to i64
+  %preheader.load1.address = getelementptr inbounds float, float* @other_value, i64 %preheader.header.common.address.idx.zext
+  %preheader.load1. = load float, float* %preheader.load1.address, align 4, !tbaa !1
+  br label %for.body23
+
+for.body23:                                       ; preds = %for.body23, %entry
+  %loop.header.load0.address = getelementptr inbounds float, float* @other_value, i64 %preheader.header.common.address.idx.zext
+  %loop.header.load0. = load float, float* %loop.header.load0.address, align 4, !tbaa !1
+  %reass.mul343.7 = mul nuw nsw i32 %reass.add346.7, 72
+  %add7.i286.7.7 = add nuw nsw i32 %reass.mul343.7, 56
+  %add9.i288.7.7 = add nuw nsw i32 %add7.i286.7.7, %mul.i.i
+  %loop.header.address1.idx = add nuw nsw i32 %add9.i288.7.7, 1
+  %loop.header.address1.idx.zext = zext i32 %loop.header.address1.idx to i64
+  %loop.header.load1.address = getelementptr inbounds float, float* @other_value, i64 %loop.header.address1.idx.zext
+  %loop.header.load1. = load float, float* %loop.header.load1.address, align 4, !tbaa !1
+  store float %preheader.load0., float* @a, align 4, !tbaa !1
+  store float %preheader.load1., float* @b, align 4, !tbaa !1
+  store float %loop.header.load0., float* @c, align 4, !tbaa !1
+  store float %loop.header.load1., float* @d, align 4, !tbaa !1
+  %loaded.cnd = load i8, i8* @cnd
+  %condition = trunc i8 %loaded.cnd to i1
+  br i1 %condition, label %for.body23, label %exit
+
+exit:
+  ret void
+}
+
+attributes #0 = { nounwind }
+
+!0 = !{i32 0, i32 65536}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"float", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C++ TBAA"}

Modified: llvm/trunk/test/Transforms/LoopVectorize/X86/pr35432.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/pr35432.ll?rev=340777&r1=340776&r2=340777&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/X86/pr35432.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/X86/pr35432.ll Mon Aug 27 14:41:37 2018
@@ -40,8 +40,8 @@ define i32 @main() local_unnamed_addr #0
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i8 [[CONV3]], -1
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext i8 [[TMP4]] to i32
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub i32 -1, [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP3]]
-; CHECK-NEXT:    [[UMAX:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 [[TMP3]]
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i32 [[TMP3]], [[TMP6]]
+; CHECK-NEXT:    [[UMAX:%.*]] = select i1 [[TMP7]], i32 [[TMP3]], i32 [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[UMAX]], 2
 ; CHECK-NEXT:    [[TMP9:%.*]] = add i32 [[TMP8]], [[TMP5]]
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP9]], 8
@@ -50,8 +50,8 @@ define i32 @main() local_unnamed_addr #0
 ; CHECK-NEXT:    [[TMP10:%.*]] = add i8 [[CONV3]], -1
 ; CHECK-NEXT:    [[TMP11:%.*]] = zext i8 [[TMP10]] to i32
 ; CHECK-NEXT:    [[TMP12:%.*]] = sub i32 -1, [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = icmp ugt i32 [[TMP12]], [[TMP3]]
-; CHECK-NEXT:    [[UMAX1:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 [[TMP3]]
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp ugt i32 [[TMP3]], [[TMP12]]
+; CHECK-NEXT:    [[UMAX1:%.*]] = select i1 [[TMP13]], i32 [[TMP3]], i32 [[TMP12]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = add i32 [[UMAX1]], 1
 ; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP11]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = trunc i32 [[TMP15]] to i8