[llvm] 7f1bf68 - [SCEVExpander] Only check overflow if it is needed.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Sun Jan 9 05:04:43 PST 2022


Author: Florian Hahn
Date: 2022-01-09T12:55:41Z
New Revision: 7f1bf68d7dd6e4d7d64e7b6b30055d62db3bbe32

URL: https://github.com/llvm/llvm-project/commit/7f1bf68d7dd6e4d7d64e7b6b30055d62db3bbe32
DIFF: https://github.com/llvm/llvm-project/commit/7f1bf68d7dd6e4d7d64e7b6b30055d62db3bbe32.diff

LOG: [SCEVExpander] Only check overflow if it is needed.

9345ab3a4550 updated generateOverflowCheck to skip creating checks that
always evaluate to false. This in turn means that we only need to check
for overflows if the result of the multiplication is actually used.

Sink the Or for the overflow check into ComputeEndCheck, so it is only
created when there's an actual check.

Added: 
    

Modified: 
    llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
    llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll
    llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll
    llvm/test/Transforms/LoopVectorize/X86/pr35432.ll
    llvm/test/Transforms/LoopVectorize/induction.ll
    llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
    llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
    llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index b7498479f4d00..3b1e160a85678 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -2553,7 +2553,7 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
       // Select the answer based on the sign of Step.
       EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
     }
-    return EndCheck;
+    return Builder.CreateOr(EndCheck, OfMul);
   };
   Value *EndCheck = ComputeEndCheck();
 
@@ -2571,7 +2571,7 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
     EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck);
   }
 
-  return Builder.CreateOr(EndCheck, OfMul);
+  return EndCheck;
 }
 
 Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred,

diff  --git a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll
index ce4cccb880313..9f3ebcc9a9afc 100644
--- a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll
+++ b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll
@@ -19,7 +19,6 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
 ; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
 ; CHECK-NEXT:    [[TMP8:%.*]] = or i1 false, [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
 ; CHECK-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
 ; CHECK-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
 ; CHECK-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
@@ -27,7 +26,7 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]]
-; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]]
+; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]]
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
 ; CHECK:       for.body.ph.lver.orig:
 ; CHECK-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
@@ -159,7 +158,6 @@ define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i3
 ; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
 ; CHECK-NEXT:    [[TMP8:%.*]] = or i1 false, [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
 ; CHECK-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
 ; CHECK-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
 ; CHECK-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
@@ -167,7 +165,7 @@ define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i3
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[MUL_RESULT3]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = icmp ult i8* [[TMP12]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*)
 ; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]]
-; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]]
+; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]]
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
 ; CHECK:       for.body.ph.lver.orig:
 ; CHECK-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]

diff  --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll
index 7b044dc66701b..38777e9ae79cd 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll
@@ -543,9 +543,9 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe
 ; VF-TWO-CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
 ; VF-TWO-CHECK-NEXT:    [[TMP4:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]]
 ; VF-TWO-CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP0]]
+; VF-TWO-CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]]
 ; VF-TWO-CHECK-NEXT:    [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
-; VF-TWO-CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
-; VF-TWO-CHECK-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
+; VF-TWO-CHECK-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[TMP8]]
 ; VF-TWO-CHECK-NEXT:    br i1 [[TMP10]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
 ; VF-TWO-CHECK:       vector.main.loop.iter.check:
 ; VF-TWO-CHECK-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32
@@ -770,9 +770,9 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe
 ; VF-FOUR-CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
 ; VF-FOUR-CHECK-NEXT:    [[TMP4:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]]
 ; VF-FOUR-CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP0]]
+; VF-FOUR-CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]]
 ; VF-FOUR-CHECK-NEXT:    [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
-; VF-FOUR-CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
-; VF-FOUR-CHECK-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
+; VF-FOUR-CHECK-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[TMP8]]
 ; VF-FOUR-CHECK-NEXT:    br i1 [[TMP10]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
 ; VF-FOUR-CHECK:       vector.main.loop.iter.check:
 ; VF-FOUR-CHECK-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll
index 61d703a71eb85..4bd408ccd4036 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll
@@ -54,9 +54,9 @@ define i32 @main() local_unnamed_addr #0 {
 ; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
 ; CHECK-NEXT:    [[TMP12:%.*]] = sub i8 [[TMP7]], [[MUL_RESULT]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = icmp ugt i8 [[TMP12]], [[TMP7]]
+; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp ugt i32 [[TMP9]], 255
-; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP14]], [[TMP16]]
-; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW]]
+; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP17]], [[TMP16]]
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP6]], 8

diff  --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index a128b44da0d6c..a3060c2a93a5f 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -6522,11 +6522,11 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp slt i8 [[TMP6]], 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp sgt i8 [[TMP7]], 0
 ; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP9]], i1 [[TMP8]]
+; CHECK-NEXT:    [[TMP14:%.*]] = or i1 [[TMP10]], [[MUL_OVERFLOW]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
 ; CHECK-NEXT:    [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
-; CHECK-NEXT:    [[TMP14:%.*]] = or i1 [[TMP10]], [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW]]
+; CHECK-NEXT:    [[TMP15:%.*]] = or i1 [[TMP14]], [[TMP13]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = sext i8 [[TMP1]] to i32
 ; CHECK-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP17]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP15]], [[IDENT_CHECK]]
@@ -6600,11 +6600,11 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
 ; IND-NEXT:    [[TMP6:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
 ; IND-NEXT:    [[TMP7:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
 ; IND-NEXT:    [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP7]], i1 [[TMP6]]
+; IND-NEXT:    [[TMP12:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
 ; IND-NEXT:    [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255
 ; IND-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0
 ; IND-NEXT:    [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]
-; IND-NEXT:    [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]]
-; IND-NEXT:    [[TMP13:%.*]] = or i1 [[TMP12]], [[MUL_OVERFLOW]]
+; IND-NEXT:    [[TMP13:%.*]] = or i1 [[TMP12]], [[TMP11]]
 ; IND-NEXT:    [[TMP14:%.*]] = add i32 [[STEP]], -128
 ; IND-NEXT:    [[TMP15:%.*]] = icmp ult i32 [[TMP14]], -256
 ; IND-NEXT:    [[TMP16:%.*]] = or i1 [[TMP13]], [[TMP15]]
@@ -6673,11 +6673,11 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
 ; UNROLL-NEXT:    [[TMP6:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
 ; UNROLL-NEXT:    [[TMP7:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
 ; UNROLL-NEXT:    [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP7]], i1 [[TMP6]]
+; UNROLL-NEXT:    [[TMP12:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
 ; UNROLL-NEXT:    [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255
 ; UNROLL-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0
 ; UNROLL-NEXT:    [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]
-; UNROLL-NEXT:    [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]]
-; UNROLL-NEXT:    [[TMP13:%.*]] = or i1 [[TMP12]], [[MUL_OVERFLOW]]
+; UNROLL-NEXT:    [[TMP13:%.*]] = or i1 [[TMP12]], [[TMP11]]
 ; UNROLL-NEXT:    [[TMP14:%.*]] = add i32 [[STEP]], -128
 ; UNROLL-NEXT:    [[TMP15:%.*]] = icmp ult i32 [[TMP14]], -256
 ; UNROLL-NEXT:    [[TMP16:%.*]] = or i1 [[TMP13]], [[TMP15]]
@@ -6753,11 +6753,11 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
 ; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = icmp slt i8 [[TMP6]], 0
 ; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = icmp sgt i8 [[TMP7]], 0
 ; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP9]], i1 [[TMP8]]
+; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = or i1 [[TMP10]], [[MUL_OVERFLOW]]
 ; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
 ; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
 ; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
-; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = or i1 [[TMP10]], [[TMP13]]
-; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW]]
+; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = or i1 [[TMP14]], [[TMP13]]
 ; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = sext i8 [[TMP1]] to i32
 ; UNROLL-NO-IC-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP17]]
 ; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = or i1 [[TMP15]], [[IDENT_CHECK]]
@@ -6838,11 +6838,11 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
 ; INTERLEAVE-NEXT:    [[TMP6:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
 ; INTERLEAVE-NEXT:    [[TMP7:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
 ; INTERLEAVE-NEXT:    [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP7]], i1 [[TMP6]]
+; INTERLEAVE-NEXT:    [[TMP12:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
 ; INTERLEAVE-NEXT:    [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255
 ; INTERLEAVE-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0
 ; INTERLEAVE-NEXT:    [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]
-; INTERLEAVE-NEXT:    [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]]
-; INTERLEAVE-NEXT:    [[TMP13:%.*]] = or i1 [[TMP12]], [[MUL_OVERFLOW]]
+; INTERLEAVE-NEXT:    [[TMP13:%.*]] = or i1 [[TMP12]], [[TMP11]]
 ; INTERLEAVE-NEXT:    [[TMP14:%.*]] = add i32 [[STEP]], -128
 ; INTERLEAVE-NEXT:    [[TMP15:%.*]] = icmp ult i32 [[TMP14]], -256
 ; INTERLEAVE-NEXT:    [[TMP16:%.*]] = or i1 [[TMP13]], [[TMP15]]

diff  --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
index 2a49b56472e17..47ca25a3a89e3 100644
--- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
@@ -173,9 +173,9 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe
 ; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP0]]
+; CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
-; CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
+; CHECK-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[TMP8]]
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
 ; CHECK:       vector.main.loop.iter.check:
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4

diff  --git a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
index 814c825d8c81d..95437757ebcd4 100644
--- a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
@@ -56,11 +56,11 @@ define void @doit1(i32 %n, i32 %step) local_unnamed_addr {
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp slt i8 [[TMP6]], 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp sgt i8 [[TMP7]], 0
 ; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP9]], i1 [[TMP8]]
+; CHECK-NEXT:    [[TMP14:%.*]] = or i1 [[TMP10]], [[MUL_OVERFLOW]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
 ; CHECK-NEXT:    [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
-; CHECK-NEXT:    [[TMP14:%.*]] = or i1 [[TMP10]], [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW]]
+; CHECK-NEXT:    [[TMP15:%.*]] = or i1 [[TMP14]], [[TMP13]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = sext i8 [[TMP1]] to i32
 ; CHECK-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP17]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP15]], [[IDENT_CHECK]]
@@ -182,11 +182,11 @@ define void @doit2(i32 %n, i32 %step) local_unnamed_addr  {
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp ult i8 [[TMP6]], 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp ugt i8 [[TMP7]], 0
 ; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP9]], i1 [[TMP8]]
+; CHECK-NEXT:    [[TMP14:%.*]] = or i1 [[TMP10]], [[MUL_OVERFLOW]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
 ; CHECK-NEXT:    [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
-; CHECK-NEXT:    [[TMP14:%.*]] = or i1 [[TMP10]], [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW]]
+; CHECK-NEXT:    [[TMP15:%.*]] = or i1 [[TMP14]], [[TMP13]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = sext i8 [[TMP1]] to i32
 ; CHECK-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP17]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP15]], [[IDENT_CHECK]]
@@ -382,11 +382,11 @@ define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr {
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp slt i8 [[TMP5]], 0
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i8 [[TMP6]], 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[TMP2]], i1 [[TMP8]], i1 [[TMP7]]
+; CHECK-NEXT:    [[TMP13:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp ugt i64 [[TMP0]], 255
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp ne i8 [[CSTEP]], 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = and i1 [[TMP10]], [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = or i1 [[TMP9]], [[TMP12]]
-; CHECK-NEXT:    [[TMP14:%.*]] = or i1 [[TMP13]], [[MUL_OVERFLOW]]
+; CHECK-NEXT:    [[TMP14:%.*]] = or i1 [[TMP13]], [[TMP12]]
 ; CHECK-NEXT:    br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4

diff  --git a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll
index 87d9a9620d4d5..7ce134e66cf8a 100644
--- a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll
+++ b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll
@@ -36,7 +36,6 @@ define void @f1(i16* noalias %a,
 ; LV-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
 ; LV-NEXT:    [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
 ; LV-NEXT:    [[TMP8:%.*]] = or i1 false, [[TMP7]]
-; LV-NEXT:    [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
 ; LV-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
 ; LV-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
 ; LV-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
@@ -44,7 +43,7 @@ define void @f1(i16* noalias %a,
 ; LV-NEXT:    [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]]
 ; LV-NEXT:    [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]]
 ; LV-NEXT:    [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]]
-; LV-NEXT:    [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]]
+; LV-NEXT:    [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]]
 ; LV-NEXT:    br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
 ; LV:       for.body.ph.lver.orig:
 ; LV-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
@@ -154,9 +153,9 @@ define void @f2(i16* noalias %a,
 ; LV-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
 ; LV-NEXT:    [[TMP4:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]]
 ; LV-NEXT:    [[TMP5:%.*]] = icmp ugt i32 [[TMP4]], [[TMP1]]
+; LV-NEXT:    [[TMP9:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]]
 ; LV-NEXT:    [[TMP8:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; LV-NEXT:    [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
-; LV-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
+; LV-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[TMP8]]
 ; LV-NEXT:    [[TMP12:%.*]] = trunc i64 [[N]] to i31
 ; LV-NEXT:    [[TMP13:%.*]] = zext i31 [[TMP12]] to i64
 ; LV-NEXT:    [[TMP14:%.*]] = shl nuw nsw i64 [[TMP13]], 1
@@ -263,9 +262,9 @@ define void @f3(i16* noalias %a,
 ; LV-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
 ; LV-NEXT:    [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
 ; LV-NEXT:    [[TMP5:%.*]] = icmp slt i32 [[TMP2]], 0
+; LV-NEXT:    [[TMP8:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]]
 ; LV-NEXT:    [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; LV-NEXT:    [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
-; LV-NEXT:    [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
+; LV-NEXT:    [[TMP9:%.*]] = or i1 [[TMP8]], [[TMP7]]
 ; LV-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
 ; LV-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
 ; LV-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
@@ -359,9 +358,9 @@ define void @f4(i16* noalias %a,
 ; LV-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
 ; LV-NEXT:    [[TMP4:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]]
 ; LV-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP1]]
+; LV-NEXT:    [[TMP9:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]]
 ; LV-NEXT:    [[TMP8:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; LV-NEXT:    [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
-; LV-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
+; LV-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[TMP8]]
 ; LV-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP1]] to i64
 ; LV-NEXT:    [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP12]]
 ; LV-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
@@ -466,9 +465,9 @@ define void @f5(i16* noalias %a,
 ; LV-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
 ; LV-NEXT:    [[TMP4:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]]
 ; LV-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP1]]
+; LV-NEXT:    [[TMP9:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]]
 ; LV-NEXT:    [[TMP8:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; LV-NEXT:    [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
-; LV-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
+; LV-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[TMP8]]
 ; LV-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP1]] to i64
 ; LV-NEXT:    [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP12]]
 ; LV-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])


        


More information about the llvm-commits mailing list