[llvm-branch-commits] [llvm] 95227e4 - Wrap
Florian Hahn via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jun 11 06:30:49 PDT 2021
Author: Florian Hahn
Date: 2021-06-11T14:06:38+01:00
New Revision: 95227e4faaa5640cc4c0601124f596ce027effce
URL: https://github.com/llvm/llvm-project/commit/95227e4faaa5640cc4c0601124f596ce027effce
DIFF: https://github.com/llvm/llvm-project/commit/95227e4faaa5640cc4c0601124f596ce027effce.diff
LOG: Wrap
Added:
Modified:
llvm/lib/Analysis/ScalarEvolution.cpp
llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll
llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 7484ce67c2f2..f6c727f743fd 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -10051,7 +10051,32 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
return (FlagsPresent & ExpectedFlags) == ExpectedFlags;
};
+ auto MatchBinaryAddToConst2 = [this](const SCEV *X, const SCEV *Y,
+ APInt &OutC1, APInt &OutC2,
+ SCEV::NoWrapFlags ExpectedFlags) {
+ const SCEV *XNonConstOp, *XConstOp;
+ const SCEV *YNonConstOp, *YConstOp;
+ SCEV::NoWrapFlags XFlagsPresent;
+ SCEV::NoWrapFlags YFlagsPresent;
+
+ if (!splitBinaryAdd(X, XConstOp, XNonConstOp, XFlagsPresent) ||
+ !isa<SCEVConstant>(XConstOp))
+ return false;
+
+ if (!splitBinaryAdd(Y, YConstOp, YNonConstOp, YFlagsPresent) ||
+ !isa<SCEVConstant>(YConstOp))
+ return false;
+
+ if (XFlagsPresent != YFlagsPresent || YNonConstOp != XNonConstOp)
+ return false;
+
+ OutC1 = cast<SCEVConstant>(XConstOp)->getAPInt();
+ OutC2 = cast<SCEVConstant>(YConstOp)->getAPInt();
+ return (XFlagsPresent & ExpectedFlags) == ExpectedFlags;
+ };
+
APInt C;
+ APInt C2;
switch (Pred) {
default:
@@ -10069,6 +10094,10 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) &&
!C.isStrictlyPositive())
return true;
+
+ if (MatchBinaryAddToConst2(LHS, RHS, C, C2, SCEV::FlagNUW) && C.sle(C2))
+ return true;
+
break;
case ICmpInst::ICMP_SGT:
@@ -10083,6 +10112,10 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
// (X + C)<nsw> s< X if C < 0
if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && C.isNegative())
return true;
+
+ if (MatchBinaryAddToConst2(LHS, RHS, C, C2, SCEV::FlagNUW) && C.slt(C2))
+ return true;
+
break;
case ICmpInst::ICMP_UGE:
@@ -10092,6 +10125,10 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
// X u<= (X + C)<nuw> for any C
if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNUW))
return true;
+
+ if (MatchBinaryAddToConst2(LHS, RHS, C, C2, SCEV::FlagNUW) && C.ule(C2))
+ return true;
+
break;
case ICmpInst::ICMP_UGT:
@@ -10101,6 +10138,9 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
// X u< (X + C)<nuw> if C != 0
if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNUW) && !C.isNullValue())
return true;
+
+ if (MatchBinaryAddToConst2(LHS, RHS, C, C2, SCEV::FlagNUW) && C.ult(C2))
+ return true;
break;
}
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll
index bdc934ae11af..c2cbb96d4236 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll
@@ -53,7 +53,7 @@ define void @needs_versioning_profitable(i32* %dst, i32* %src) {
; CHECK-NEXT: entry.slpmemcheck:
; CHECK-NEXT: [[DST16:%.*]] = bitcast i32* [[DST:%.*]] to i8*
; CHECK-NEXT: [[SRC18:%.*]] = bitcast i32* [[SRC:%.*]] to i8*
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[DST]], i64 2
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[DST]], i64 3
; CHECK-NEXT: [[SCEVGEP17:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; CHECK-NEXT: [[SCEVGEP19:%.*]] = getelementptr i32, i32* [[SRC]], i64 3
; CHECK-NEXT: [[SCEVGEP1920:%.*]] = bitcast i32* [[SCEVGEP19]] to i8*
@@ -62,23 +62,17 @@ define void @needs_versioning_profitable(i32* %dst, i32* %src) {
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[ENTRY_SCALAR:%.*]], label [[ENTRY_SLPVERSIONED:%.*]]
; CHECK: entry.slpversioned:
-; CHECK-NEXT: [[SRC_0:%.*]] = load i32, i32* [[SRC]], align 4, !alias.scope !5, !noalias !8
-; CHECK-NEXT: [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
-; CHECK-NEXT: store i32 [[R_0]], i32* [[DST]], align 4, !alias.scope !8, !noalias !5
; CHECK-NEXT: [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 1
-; CHECK-NEXT: [[SRC_1:%.*]] = load i32, i32* [[SRC_GEP_1]], align 4
-; CHECK-NEXT: [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 1
-; CHECK-NEXT: store i32 [[R_1]], i32* [[DST_GEP_1]], align 4
; CHECK-NEXT: [[SRC_GEP_2:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 2
; CHECK-NEXT: [[DST_GEP_2:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 2
; CHECK-NEXT: [[SRC_GEP_3:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 3
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC_GEP_2]] to <2 x i32>*
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = ashr <2 x i32> [[TMP1]], <i32 16, i32 16>
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC]] to <4 x i32>*
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !alias.scope !5, !noalias !8
+; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 16, i32 16, i32 16, i32 16>
; CHECK-NEXT: [[DST_GEP_3:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST_GEP_2]] to <2 x i32>*
-; CHECK-NEXT: store <2 x i32> [[TMP2]], <2 x i32>* [[TMP3]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4, !alias.scope !8, !noalias !5
; CHECK-NEXT: br label [[ENTRY_MERGE:%.*]]
; CHECK: entry.merge:
; CHECK-NEXT: ret void
@@ -156,7 +150,7 @@ define void @version_multiple(i32* nocapture %out_block, i32* nocapture readonly
; CHECK-NEXT: entry.slpmemcheck:
; CHECK-NEXT: [[OUT_BLOCK12:%.*]] = bitcast i32* [[OUT_BLOCK:%.*]] to i8*
; CHECK-NEXT: [[COUNTER14:%.*]] = bitcast i32* [[COUNTER:%.*]] to i8*
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[OUT_BLOCK]], i64 2
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[OUT_BLOCK]], i64 3
; CHECK-NEXT: [[SCEVGEP13:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; CHECK-NEXT: [[SCEVGEP15:%.*]] = getelementptr i32, i32* [[COUNTER]], i64 3
; CHECK-NEXT: [[SCEVGEP1516:%.*]] = bitcast i32* [[SCEVGEP15]] to i8*
@@ -165,52 +159,44 @@ define void @version_multiple(i32* nocapture %out_block, i32* nocapture readonly
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[ENTRY_SCALAR:%.*]], label [[ENTRY_SLPVERSIONED:%.*]]
; CHECK: entry.slpversioned:
-; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[COUNTER]], align 4, !alias.scope !10, !noalias !13
-; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[OUT_BLOCK]], align 4, !alias.scope !13, !noalias !10
-; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[TMP1]], [[TMP0]]
-; CHECK-NEXT: store i32 [[XOR]], i32* [[OUT_BLOCK]], align 4, !alias.scope !13, !noalias !10
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 1
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 1
-; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2_1]], align 4
-; CHECK-NEXT: [[XOR_1:%.*]] = xor i32 [[TMP3]], [[TMP2]]
-; CHECK-NEXT: store i32 [[XOR_1]], i32* [[ARRAYIDX2_1]], align 4
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 2
; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 2
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 3
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[ARRAYIDX_2]] to <2 x i32>*
-; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[COUNTER]] to <4 x i32>*
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !alias.scope !10, !noalias !13
; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 3
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[ARRAYIDX2_2]] to <2 x i32>*
-; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[TMP6]], align 4
-; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i32> [[TMP7]], [[TMP5]]
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[ARRAYIDX2_2]] to <2 x i32>*
-; CHECK-NEXT: store <2 x i32> [[TMP8]], <2 x i32>* [[TMP9]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[OUT_BLOCK]] to <4 x i32>*
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4, !alias.scope !13, !noalias !10
+; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i32> [[TMP3]], [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[OUT_BLOCK]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4, !alias.scope !13, !noalias !10
; CHECK-NEXT: br label [[ENTRY_MERGE:%.*]]
; CHECK: entry.merge:
; CHECK-NEXT: ret void
; CHECK: entry.scalar:
-; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[COUNTER]], align 4
-; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[OUT_BLOCK]], align 4
-; CHECK-NEXT: [[XOR2:%.*]] = xor i32 [[TMP11]], [[TMP10]]
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[COUNTER]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[OUT_BLOCK]], align 4
+; CHECK-NEXT: [[XOR2:%.*]] = xor i32 [[TMP7]], [[TMP6]]
; CHECK-NEXT: store i32 [[XOR2]], i32* [[OUT_BLOCK]], align 4
; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 1
-; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX_13]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_13]], align 4
; CHECK-NEXT: [[ARRAYIDX2_14:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX2_14]], align 4
-; CHECK-NEXT: [[XOR_15:%.*]] = xor i32 [[TMP13]], [[TMP12]]
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX2_14]], align 4
+; CHECK-NEXT: [[XOR_15:%.*]] = xor i32 [[TMP9]], [[TMP8]]
; CHECK-NEXT: store i32 [[XOR_15]], i32* [[ARRAYIDX2_14]], align 4
; CHECK-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 2
-; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX_26]], align 4
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_26]], align 4
; CHECK-NEXT: [[ARRAYIDX2_27:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 2
-; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX2_27]], align 4
-; CHECK-NEXT: [[XOR_28:%.*]] = xor i32 [[TMP15]], [[TMP14]]
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX2_27]], align 4
+; CHECK-NEXT: [[XOR_28:%.*]] = xor i32 [[TMP11]], [[TMP10]]
; CHECK-NEXT: store i32 [[XOR_28]], i32* [[ARRAYIDX2_27]], align 4
; CHECK-NEXT: [[ARRAYIDX_39:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 3
-; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX_39]], align 4
+; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX_39]], align 4
; CHECK-NEXT: [[ARRAYIDX2_310:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 3
-; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX2_310]], align 4
-; CHECK-NEXT: [[XOR_311:%.*]] = xor i32 [[TMP17]], [[TMP16]]
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX2_310]], align 4
+; CHECK-NEXT: [[XOR_311:%.*]] = xor i32 [[TMP13]], [[TMP12]]
; CHECK-NEXT: store i32 [[XOR_311]], i32* [[ARRAYIDX2_310]], align 4
; CHECK-NEXT: br label [[ENTRY_MERGE]]
;
@@ -387,10 +373,12 @@ define void @slp_not_beneficial(i32* %A, i32* %B) {
; CHECK-NEXT: bb.slpmemcheck:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 4
; CHECK-NEXT: [[SCEVGEP6:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
-; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 4
+; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i32, i32* [[A]], i64 5
; CHECK-NEXT: [[SCEVGEP78:%.*]] = bitcast i32* [[SCEVGEP7]] to i8*
-; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP6]], [[SCEVGEP78]]
-; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP78]], [[SCEVGEP6]]
+; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 4
+; CHECK-NEXT: [[SCEVGEP910:%.*]] = bitcast i32* [[SCEVGEP9]] to i8*
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP6]], [[SCEVGEP910]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP910]], [[SCEVGEP78]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 4
; CHECK-NEXT: store i32 0, i32* [[TMP2]], align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll b/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll
index 4db147a53076..8e1bb67d33e8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll
@@ -6,36 +6,55 @@ define void @version_multiple(i32* nocapture %out_block, i32* nocapture readonly
; CHECK-NEXT: entry.slpmemcheck:
; CHECK-NEXT: [[OUT_BLOCK12:%.*]] = bitcast i32* [[OUT_BLOCK:%.*]] to i8*
; CHECK-NEXT: [[COUNTER14:%.*]] = bitcast i32* [[COUNTER:%.*]] to i8*
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[OUT_BLOCK]], i64 1
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[OUT_BLOCK]], i64 3
; CHECK-NEXT: [[SCEVGEP13:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
-; CHECK-NEXT: [[SCEVGEP15:%.*]] = getelementptr i32, i32* [[COUNTER]], i64 1
+; CHECK-NEXT: [[SCEVGEP15:%.*]] = getelementptr i32, i32* [[COUNTER]], i64 3
; CHECK-NEXT: [[SCEVGEP1516:%.*]] = bitcast i32* [[SCEVGEP15]] to i8*
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[OUT_BLOCK12]], [[SCEVGEP1516]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[COUNTER14]], [[SCEVGEP13]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
-; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[COUNTER]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[OUT_BLOCK]], align 4
-; CHECK-NEXT: [[XOR2:%.*]] = xor i32 [[TMP1]], [[TMP0]]
+; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[ENTRY_SCALAR:%.*]], label [[ENTRY_SLPVERSIONED:%.*]]
+; CHECK: entry.slpversioned:
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 1
+; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 1
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 2
+; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 2
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 3
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[COUNTER]] to <4 x i32>*
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !alias.scope !0, !noalias !3
+; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 3
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[OUT_BLOCK]] to <4 x i32>*
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4, !alias.scope !3, !noalias !0
+; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i32> [[TMP3]], [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[OUT_BLOCK]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4, !alias.scope !3, !noalias !0
+; CHECK-NEXT: br label [[ENTRY_MERGE:%.*]]
+; CHECK: entry.merge:
+; CHECK-NEXT: ret void
+; CHECK: entry.scalar:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[COUNTER]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[OUT_BLOCK]], align 4
+; CHECK-NEXT: [[XOR2:%.*]] = xor i32 [[TMP7]], [[TMP6]]
; CHECK-NEXT: store i32 [[XOR2]], i32* [[OUT_BLOCK]], align 4
; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 1
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_13]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_13]], align 4
; CHECK-NEXT: [[ARRAYIDX2_14:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 1
-; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2_14]], align 4
-; CHECK-NEXT: [[XOR_15:%.*]] = xor i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX2_14]], align 4
+; CHECK-NEXT: [[XOR_15:%.*]] = xor i32 [[TMP9]], [[TMP8]]
; CHECK-NEXT: store i32 [[XOR_15]], i32* [[ARRAYIDX2_14]], align 4
; CHECK-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 2
-; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_26]], align 4
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_26]], align 4
; CHECK-NEXT: [[ARRAYIDX2_27:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 2
-; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX2_27]], align 4
-; CHECK-NEXT: [[XOR_28:%.*]] = xor i32 [[TMP5]], [[TMP4]]
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX2_27]], align 4
+; CHECK-NEXT: [[XOR_28:%.*]] = xor i32 [[TMP11]], [[TMP10]]
; CHECK-NEXT: store i32 [[XOR_28]], i32* [[ARRAYIDX2_27]], align 4
; CHECK-NEXT: [[ARRAYIDX_39:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 3
-; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_39]], align 4
+; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX_39]], align 4
; CHECK-NEXT: [[ARRAYIDX2_310:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 3
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX2_310]], align 4
-; CHECK-NEXT: [[XOR_311:%.*]] = xor i32 [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX2_310]], align 4
+; CHECK-NEXT: [[XOR_311:%.*]] = xor i32 [[TMP13]], [[TMP12]]
; CHECK-NEXT: store i32 [[XOR_311]], i32* [[ARRAYIDX2_310]], align 4
-; CHECK-NEXT: ret void
+; CHECK-NEXT: br label [[ENTRY_MERGE]]
;
entry:
%0 = load i32, i32* %counter, align 4
@@ -79,12 +98,14 @@ define void @delete_pointer_bound(float* %a, float* %b, i1 %c) #0 {
; CHECK-NEXT: call void @use(<8 x float> [[I71]])
; CHECK-NEXT: ret void
; CHECK: then.slpmemcheck:
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[A:%.*]], i64 8
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[A:%.*]], i64 5
; CHECK-NEXT: [[SCEVGEP8:%.*]] = bitcast float* [[SCEVGEP]] to i8*
-; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr float, float* [[B]], i64 14
+; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr float, float* [[A]], i64 8
; CHECK-NEXT: [[SCEVGEP910:%.*]] = bitcast float* [[SCEVGEP9]] to i8*
-; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP8]], [[SCEVGEP910]]
-; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP910]], [[SCEVGEP8]]
+; CHECK-NEXT: [[SCEVGEP11:%.*]] = getelementptr float, float* [[B]], i64 14
+; CHECK-NEXT: [[SCEVGEP1112:%.*]] = bitcast float* [[SCEVGEP11]] to i8*
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP8]], [[SCEVGEP1112]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP1112]], [[SCEVGEP910]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: [[A_83:%.*]] = getelementptr inbounds float, float* [[A]], i64 8
; CHECK-NEXT: store float 0.000000e+00, float* [[A_83]], align 4
More information about the llvm-branch-commits
mailing list