[llvm] [Loads] Also consider getPointerAlignment when checking assumptions. (PR #120916)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 9 09:13:49 PST 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/120916
>From 1ea548dc304a740b2ab69f4cd4bf852f2b7c89ad Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 22 Dec 2024 16:15:04 +0000
Subject: [PATCH 1/5] [Loads] Also use isAligned when checking assumptions.
Also use isAligned when trying to use alignment and dereferenceable
assumptions. This catches cases where dereferencable is known via the
assumption but alignment is known via isAligned (e.g. via argument
attribute or align of 1)
---
llvm/lib/Analysis/Loads.cpp | 10 +-
...able-info-from-assumption-constant-size.ll | 147 +++++++++++++-----
2 files changed, 111 insertions(+), 46 deletions(-)
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index bc03e4052a705b..a7999d51f69d53 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -170,9 +170,11 @@ static bool isDereferenceableAndAlignedPointer(
if (CtxI) {
/// Look through assumes to see if both dereferencability and alignment can
- /// be provent by an assume
+ /// be provent by an assume if needed.
RetainedKnowledge AlignRK;
RetainedKnowledge DerefRK;
+ APInt Offset(DL.getTypeStoreSizeInBits(V->getType()), 0);
+ bool IsAligned = isAligned(V, Offset, Alignment, DL);
if (getKnowledgeForValue(
V, {Attribute::Dereferenceable, Attribute::Alignment}, AC,
[&](RetainedKnowledge RK, Instruction *Assume, auto) {
@@ -182,8 +184,10 @@ static bool isDereferenceableAndAlignedPointer(
AlignRK = std::max(AlignRK, RK);
if (RK.AttrKind == Attribute::Dereferenceable)
DerefRK = std::max(DerefRK, RK);
- if (AlignRK && DerefRK && AlignRK.ArgValue >= Alignment.value() &&
- DerefRK.ArgValue >= Size.getZExtValue())
+ if (!IsAligned && AlignRK &&
+ AlignRK.ArgValue >= Alignment.value())
+ return false;
+ if (DerefRK && DerefRK.ArgValue >= Size.getZExtValue())
return true; // We have found what we needed so we stop looking
return false; // Other assumes may have better information. so
// keep looking
diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
index 1333451a0077e3..1636093a91a8e5 100644
--- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
@@ -899,32 +899,15 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_1(ptr noali
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
-; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
-; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 1
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
-; CHECK: [[PRED_LOAD_CONTINUE]]:
-; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
-; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_CONTINUE2]]:
-; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP15:%.*]] = load <2 x i32>, ptr [[TMP5]], align 1
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
@@ -1089,15 +1072,32 @@ define void @align_and_deref_assumption_in_preheader_constant_trip_count_align_4
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4
+; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4
+; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> poison, i32 [[TMP16]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; CHECK: [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP17]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_IF1]]:
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
@@ -1168,32 +1168,93 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_known_via
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
-; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
-; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
-; CHECK: [[PRED_LOAD_CONTINUE]]:
-; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
-; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_CONTINUE2]]:
-; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP15:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
+; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP17]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
+; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
+; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
+; CHECK: [[LOOP_THEN]]:
+; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
+; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
+; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP25:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 4000) ]
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv
+ %l.b = load i32, ptr %gep.b, align 4
+ %c.1 = icmp sge i32 %l.b, 0
+ br i1 %c.1, label %loop.latch, label %loop.then
+
+loop.then:
+ %gep.a = getelementptr i32, ptr %a, i64 %iv
+ %l.a = load i32, ptr %gep.a, align 4
+ br label %loop.latch
+
+loop.latch:
+ %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ]
+ %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv
+ store i32 %merge, ptr %gep.c, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, 1000
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+
+define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+; CHECK-LABEL: define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known(
+; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 4000) ]
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP15:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
>From ce9186ba64dcddd874d3534532eb0edbf76551d4 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 8 Jan 2025 15:12:07 +0000
Subject: [PATCH 2/5] !fixup update tests
---
...able-info-from-assumption-constant-size.ll | 120 +++---------------
1 file changed, 19 insertions(+), 101 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
index 1636093a91a8e5..103bd0fb8a2313 100644
--- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
@@ -113,16 +113,29 @@ define void @deref_assumption_in_header_constant_trip_count_loop_invariant_ptr(p
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
+; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP5]], i32 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; CHECK: [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP12]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
+; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_IF1]]:
+; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP15]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP11]], %[[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
@@ -1183,7 +1196,7 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_known_via
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP17]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
-; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
@@ -1205,7 +1218,7 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_known_via
; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP25:![0-9]+]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP27:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
@@ -1261,101 +1274,6 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP17]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
-; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
-; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
-; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
-; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
-; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
-; CHECK: [[LOOP_THEN]]:
-; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
-; CHECK-NEXT: br label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
-; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP27:![0-9]+]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
-;
-entry:
- call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 4000) ]
- br label %loop.header
-
-loop.header:
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
- %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv
- %l.b = load i32, ptr %gep.b, align 4
- %c.1 = icmp sge i32 %l.b, 0
- br i1 %c.1, label %loop.latch, label %loop.then
-
-loop.then:
- %gep.a = getelementptr i32, ptr %a, i64 %iv
- %l.a = load i32, ptr %gep.a, align 4
- br label %loop.latch
-
-loop.latch:
- %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ]
- %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv
- store i32 %merge, ptr %gep.c, align 4
- %iv.next = add nuw nsw i64 %iv, 1
- %ec = icmp eq i64 %iv.next, 1000
- br i1 %ec, label %exit, label %loop.header
-
-exit:
- ret void
-}
-
-define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
-; CHECK-LABEL: define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known(
-; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 4000) ]
-; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
-; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
-; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
-; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
-; CHECK: [[PRED_LOAD_IF]]:
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
-; CHECK: [[PRED_LOAD_CONTINUE]]:
-; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
-; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_CONTINUE2]]:
-; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
-; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP17]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
>From 79f2f2fea667fce2c491d8502143034179b92f9c Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 8 Jan 2025 15:19:44 +0000
Subject: [PATCH 3/5] !fixup restore changes to address comments
---
llvm/lib/Analysis/Loads.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index a7999d51f69d53..d48f6b9f92fa21 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -170,11 +170,11 @@ static bool isDereferenceableAndAlignedPointer(
if (CtxI) {
/// Look through assumes to see if both dereferencability and alignment can
- /// be provent by an assume if needed.
+ /// be proven by an assume if needed.
RetainedKnowledge AlignRK;
RetainedKnowledge DerefRK;
APInt Offset(DL.getTypeStoreSizeInBits(V->getType()), 0);
- bool IsAligned = isAligned(V, Offset, Alignment, DL);
+ bool IsAligned = V->getPointerAlignment(DL) >= Alignment;
if (getKnowledgeForValue(
V, {Attribute::Dereferenceable, Attribute::Alignment}, AC,
[&](RetainedKnowledge RK, Instruction *Assume, auto) {
>From bae8cca80d099565a67279860e61cd9d0fdfe17b Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 8 Jan 2025 15:20:46 +0000
Subject: [PATCH 4/5] !fixup remove dead Offset variable.
---
llvm/lib/Analysis/Loads.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index d48f6b9f92fa21..479057b4dabd0c 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -173,7 +173,6 @@ static bool isDereferenceableAndAlignedPointer(
/// be proven by an assume if needed.
RetainedKnowledge AlignRK;
RetainedKnowledge DerefRK;
- APInt Offset(DL.getTypeStoreSizeInBits(V->getType()), 0);
bool IsAligned = V->getPointerAlignment(DL) >= Alignment;
if (getKnowledgeForValue(
V, {Attribute::Dereferenceable, Attribute::Alignment}, AC,
>From 5019a297e6add09d17a5d65a4bd9e43d3f301e5f Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 9 Jan 2025 12:41:19 +0000
Subject: [PATCH 5/5] !fixup Update IsAligned
---
llvm/lib/Analysis/Loads.cpp | 7 +-
...able-info-from-assumption-constant-size.ll | 71 ++++++++-----------
2 files changed, 32 insertions(+), 46 deletions(-)
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 479057b4dabd0c..7bbd469bd035d3 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -183,10 +183,9 @@ static bool isDereferenceableAndAlignedPointer(
AlignRK = std::max(AlignRK, RK);
if (RK.AttrKind == Attribute::Dereferenceable)
DerefRK = std::max(DerefRK, RK);
- if (!IsAligned && AlignRK &&
- AlignRK.ArgValue >= Alignment.value())
- return false;
- if (DerefRK && DerefRK.ArgValue >= Size.getZExtValue())
+ IsAligned |= AlignRK && AlignRK.ArgValue >= Alignment.value();
+ if (IsAligned && DerefRK &&
+ DerefRK.ArgValue >= Size.getZExtValue())
return true; // We have found what we needed so we stop looking
return false; // Other assumes may have better information. so
// keep looking
diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
index 103bd0fb8a2313..815e6bce52c0a1 100644
--- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
@@ -104,8 +104,8 @@ exit:
ret void
}
-define void @deref_assumption_in_header_constant_trip_count_loop_invariant_ptr(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
-; CHECK-LABEL: define void @deref_assumption_in_header_constant_trip_count_loop_invariant_ptr(
+define void @align_deref_assumption_in_header_constant_trip_count_loop_invariant_ptr(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+; CHECK-LABEL: define void @align_deref_assumption_in_header_constant_trip_count_loop_invariant_ptr(
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 4) ]
@@ -113,29 +113,16 @@ define void @deref_assumption_in_header_constant_trip_count_loop_invariant_ptr(p
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
-; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
-; CHECK: [[PRED_LOAD_IF]]:
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
-; CHECK: [[PRED_LOAD_CONTINUE]]:
-; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP12]], %[[PRED_LOAD_IF]] ]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
-; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP15]], i32 1
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_CONTINUE2]]:
-; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP11]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP5]], i32 1
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
@@ -1085,32 +1072,15 @@ define void @align_and_deref_assumption_in_preheader_constant_trip_count_align_4
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0
-; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
-; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4
-; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> poison, i32 [[TMP16]], i32 0
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
-; CHECK: [[PRED_LOAD_CONTINUE]]:
-; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP17]], %[[PRED_LOAD_IF]] ]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
-; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_CONTINUE2]]:
-; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
@@ -1259,15 +1229,32 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP15:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; CHECK: [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_IF1]]:
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
More information about the llvm-commits
mailing list