[llvm] [Loads] Also use isAligned when checking assumptions. (PR #120916)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 23 05:04:01 PST 2024
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/120916
>From 1e1f376fe88776416e8d3caec88d02084474fbdc Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 22 Dec 2024 16:15:04 +0000
Subject: [PATCH 1/2] [Loads] Also use isAligned when checking assumptions.
Also use isAligned when trying to use alignment and dereferenceable
assumptions. This catches cases where dereferencable is known via the
assumption but alignment is known via isAligned (e.g. via argument
attribute or align of 1)
---
llvm/lib/Analysis/Loads.cpp | 10 +-
...able-info-from-assumption-constant-size.ll | 92 ++++++-------------
2 files changed, 36 insertions(+), 66 deletions(-)
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 54b9521fda8fd2..c56a45aa32ae6f 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -173,9 +173,11 @@ static bool isDereferenceableAndAlignedPointer(
if (CtxI) {
/// Look through assumes to see if both dereferencability and alignment can
- /// be provent by an assume
+ /// be provent by an assume if needed.
RetainedKnowledge AlignRK;
RetainedKnowledge DerefRK;
+ APInt Offset(DL.getTypeStoreSizeInBits(V->getType()), 0);
+ bool IsAligned = isAligned(V, Offset, Alignment, DL);
if (getKnowledgeForValue(
V, {Attribute::Dereferenceable, Attribute::Alignment}, AC,
[&](RetainedKnowledge RK, Instruction *Assume, auto) {
@@ -185,8 +187,10 @@ static bool isDereferenceableAndAlignedPointer(
AlignRK = std::max(AlignRK, RK);
if (RK.AttrKind == Attribute::Dereferenceable)
DerefRK = std::max(DerefRK, RK);
- if (AlignRK && DerefRK && AlignRK.ArgValue >= Alignment.value() &&
- DerefRK.ArgValue >= Size.getZExtValue())
+ if (!IsAligned && AlignRK &&
+ AlignRK.ArgValue >= Alignment.value())
+ return false;
+ if (DerefRK && DerefRK.ArgValue >= Size.getZExtValue())
return true; // We have found what we needed so we stop looking
return false; // Other assumes may have better information. so
// keep looking
diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
index 8baff7b33118bf..6eee3be683580a 100644
--- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
@@ -822,32 +822,15 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_1(ptr noali
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
-; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
-; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 1
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
-; CHECK: [[PRED_LOAD_CONTINUE]]:
-; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
-; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_CONTINUE2]]:
-; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP15:%.*]] = load <2 x i32>, ptr [[TMP5]], align 1
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
@@ -1012,15 +995,32 @@ define void @align_and_deref_assumption_in_preheader_constant_trip_count_align_4
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4
+; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4
+; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> poison, i32 [[TMP16]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; CHECK: [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP17]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_IF1]]:
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
@@ -1091,32 +1091,15 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_known_via
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
-; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
-; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
-; CHECK: [[PRED_LOAD_CONTINUE]]:
-; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
-; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_CONTINUE2]]:
-; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP15:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
@@ -1186,32 +1169,15 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
-; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
-; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
-; CHECK: [[PRED_LOAD_CONTINUE]]:
-; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
-; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_CONTINUE2]]:
-; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP15:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
>From dc78ea64bbc091b7448fefd4340259d31579c2a9 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 23 Dec 2024 13:02:08 +0000
Subject: [PATCH 2/2] !fixup address latest comments, thanks!
---
llvm/lib/Analysis/Loads.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index c56a45aa32ae6f..d322bc4175d21a 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -173,11 +173,11 @@ static bool isDereferenceableAndAlignedPointer(
if (CtxI) {
/// Look through assumes to see if both dereferencability and alignment can
- /// be provent by an assume if needed.
+ /// be proven by an assume if needed.
RetainedKnowledge AlignRK;
RetainedKnowledge DerefRK;
APInt Offset(DL.getTypeStoreSizeInBits(V->getType()), 0);
- bool IsAligned = isAligned(V, Offset, Alignment, DL);
+ bool IsAligned = V->getPointerAlignment(DL) >= Alignment;
if (getKnowledgeForValue(
V, {Attribute::Dereferenceable, Attribute::Alignment}, AC,
[&](RetainedKnowledge RK, Instruction *Assume, auto) {
More information about the llvm-commits
mailing list