[llvm] 8b1c4e3 - [BasicAA] Handle two unknown sizes for GEPs

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 11 09:46:18 PST 2020


Author: Nikita Popov
Date: 2020-12-11T18:45:53+01:00
New Revision: 8b1c4e310c2f9686cad925ad81d8e2be10a1ef3c

URL: https://github.com/llvm/llvm-project/commit/8b1c4e310c2f9686cad925ad81d8e2be10a1ef3c
DIFF: https://github.com/llvm/llvm-project/commit/8b1c4e310c2f9686cad925ad81d8e2be10a1ef3c.diff

LOG: [BasicAA] Handle two unknown sizes for GEPs

If we have two unknown sizes and one GEP operand and one non-GEP
operand, then we currently simply return MayAlias. The comment says
we can't do anything useful ... but we can! We can still check that
the underlying objects are different (and do so for the GEP-GEP case).

To reduce the compile-time impact, this a) checks this early, before
doing the relatively expensive GEP decomposition that will not be
used and b) doesn't do the check if the other operand is a phi or
select. In that case, the phi/select will already recurse, so this
would just do two slightly different recursive walks that arrive at
the same roots.

Compile-time is still a bit of a mixed bag: https://llvm-compile-time-tracker.com/compare.php?from=624af932a808b363a888139beca49f57313d9a3b&to=845356e14adbe651a553ed11318ddb5e79a24bcd&stat=instructions
On average this is a small improvement, but sqlite with ThinLTO has
a 0.5% regression (lencod has a 1% improvement).

The BasicAA test case checks this by using two memsets with unknown
size. However, the more interesting case where this is useful is
the LoopVectorize test case, as analysis of accesses in loops tends
to always us unknown sizes.

Differential Revision: https://reviews.llvm.org/D92401

Added: 
    

Modified: 
    llvm/lib/Analysis/BasicAliasAnalysis.cpp
    llvm/test/Analysis/BasicAA/phi-aa.ll
    llvm/test/Analysis/BasicAA/recphi.ll
    llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
    llvm/test/Transforms/LoopVersioning/exit-block-dominates-rt-check-block.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 7e48b7303bef..caf540bb7fd9 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1098,6 +1098,22 @@ AliasResult BasicAAResult::aliasGEP(
     const GEPOperator *GEP1, LocationSize V1Size, const AAMDNodes &V1AAInfo,
     const Value *V2, LocationSize V2Size, const AAMDNodes &V2AAInfo,
     const Value *UnderlyingV1, const Value *UnderlyingV2, AAQueryInfo &AAQI) {
+  // If both accesses are unknown size, we can only check whether the
+  // underlying objects are 
diff erent.
+  if (!V1Size.hasValue() && !V2Size.hasValue()) {
+    // If the other operand is a phi/select, let phi/select handling perform
+    // this check. Otherwise the same recursive walk is done twice.
+    if (!isa<PHINode>(V2) && !isa<SelectInst>(V2)) {
+      AliasResult BaseAlias =
+          aliasCheck(UnderlyingV1, LocationSize::beforeOrAfterPointer(),
+                     AAMDNodes(), UnderlyingV2,
+                     LocationSize::beforeOrAfterPointer(), AAMDNodes(), AAQI);
+      if (BaseAlias == NoAlias)
+        return NoAlias;
+    }
+    return MayAlias;
+  }
+
   DecomposedGEP DecompGEP1 = DecomposeGEPExpression(GEP1, DL, &AC, DT);
   DecomposedGEP DecompGEP2 = DecomposeGEPExpression(V2, DL, &AC, DT);
 
@@ -1156,10 +1172,6 @@ AliasResult BasicAAResult::aliasGEP(
     // instruction.  If one pointer is a GEP with a non-zero index of the other
     // pointer, we know they cannot alias.
 
-    // If both accesses are unknown size, we can't do anything useful here.
-    if (!V1Size.hasValue() && !V2Size.hasValue())
-      return MayAlias;
-
     AliasResult R = aliasCheck(
         UnderlyingV1, LocationSize::beforeOrAfterPointer(), AAMDNodes(),
         V2, V2Size, V2AAInfo, AAQI, nullptr, UnderlyingV2);

diff  --git a/llvm/test/Analysis/BasicAA/phi-aa.ll b/llvm/test/Analysis/BasicAA/phi-aa.ll
index bf5915f1e0fe..3446d0bef849 100644
--- a/llvm/test/Analysis/BasicAA/phi-aa.ll
+++ b/llvm/test/Analysis/BasicAA/phi-aa.ll
@@ -152,8 +152,7 @@ loop:
 }
 
 ; CHECK-LABEL: phi_and_gep_unknown_size
-; CHECK: Just Mod:   call void @llvm.memset.p0i8.i32(i8* %g, i8 0, i32 %size, i1 false) <->   call void @llvm.memset.p0i8.i32(i8* %z, i8 0, i32 %size, i1 false)
-; TODO: This should be NoModRef.
+; CHECK: NoModRef:   call void @llvm.memset.p0i8.i32(i8* %g, i8 0, i32 %size, i1 false) <->   call void @llvm.memset.p0i8.i32(i8* %z, i8 0, i32 %size, i1 false)
 define void @phi_and_gep_unknown_size(i1 %c, i8* %x, i8* %y, i8* noalias %z, i32 %size) {
 entry:
   br i1 %c, label %true, label %false

diff  --git a/llvm/test/Analysis/BasicAA/recphi.ll b/llvm/test/Analysis/BasicAA/recphi.ll
index 7f8bc9d7b668..882128a2daf0 100644
--- a/llvm/test/Analysis/BasicAA/recphi.ll
+++ b/llvm/test/Analysis/BasicAA/recphi.ll
@@ -267,9 +267,8 @@ exit:
 ; CHECK: NoAlias:  i8* %a, i8* %p.base
 ; CHECK: NoAlias:  i8* %a, i8* %p.outer
 ; CHECK: NoAlias:  i8* %a, i8* %p.outer.next
-; CHECK: MayAlias: i8* %a, i8* %p.inner
+; CHECK: NoAlias:  i8* %a, i8* %p.inner
 ; CHECK: NoAlias:  i8* %a, i8* %p.inner.next
-; TODO: (a, p.inner) could be NoAlias
 define void @nested_loop2(i1 %c, i1 %c2, i8* noalias %p.base) {
 entry:
   %a = alloca i8

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
index 4c66825f6565..21c0d8be8a37 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
@@ -876,62 +876,31 @@ for.body:
 define hidden void @mult_ptr_iv(i8* noalias nocapture readonly %x, i8* noalias nocapture %z) {
 ; CHECK-LABEL: @mult_ptr_iv(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8* [[Z:%.*]], i32 3000
-; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, i8* [[X:%.*]], i32 3000
-; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt i8* [[SCEVGEP1]], [[Z]]
-; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ugt i8* [[SCEVGEP]], [[X]]
-; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
-; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK:       vector.ph:
-; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, i8* [[X]], i32 3000
-; CHECK-NEXT:    [[IND_END3:%.*]] = getelementptr i8, i8* [[Z]], i32 3000
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi i8* [ [[X]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[POINTER_PHI5:%.*]] = phi i8* [ [[Z]], [[VECTOR_PH]] ], [ [[PTR_IND6:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi i8* [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[POINTER_PHI4:%.*]] = phi i8* [ [[Z:%.*]], [[ENTRY]] ], [ [[PTR_IND5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[POINTER_PHI5]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[POINTER_PHI4]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP0]], i32 1
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP0]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef), !alias.scope !26
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP0]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP0]], i32 2
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP2]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef), !alias.scope !26
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP3]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef), !alias.scope !26
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER6:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP2]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP3]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], <i8 10, i8 10, i8 10, i8 10>
-; CHECK-NEXT:    [[TMP5:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER7]]
-; CHECK-NEXT:    [[TMP6:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER8]]
+; CHECK-NEXT:    [[TMP5:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER6]]
+; CHECK-NEXT:    [[TMP6:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER7]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP1]], i32 1
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP4]], <4 x i8*> [[TMP1]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !alias.scope !29, !noalias !26
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP4]], <4 x i8*> [[TMP1]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP1]], i32 2
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP5]], <4 x i8*> [[TMP7]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !alias.scope !29, !noalias !26
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP6]], <4 x i8*> [[TMP8]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !alias.scope !29, !noalias !26
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP5]], <4 x i8*> [[TMP7]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP6]], <4 x i8*> [[TMP8]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
 ; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i32 12
-; CHECK-NEXT:    [[PTR_IND6]] = getelementptr i8, i8* [[POINTER_PHI5]], i32 12
-; CHECK-NEXT:    br i1 [[TMP9]], label [[END:%.*]], label [[VECTOR_BODY]], [[LOOP31:!llvm.loop !.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[X_ADDR_050:%.*]] = phi i8* [ [[INCDEC_PTR2:%.*]], [[FOR_BODY]] ], [ [[X]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[Z_ADDR_049:%.*]] = phi i8* [ [[INCDEC_PTR34:%.*]], [[FOR_BODY]] ], [ [[Z]], [[ENTRY]] ]
-; CHECK-NEXT:    [[I_048:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, i8* [[X_ADDR_050]], i32 1
-; CHECK-NEXT:    [[TMP10:%.*]] = load i8, i8* [[X_ADDR_050]], align 1
-; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i8, i8* [[X_ADDR_050]], i32 2
-; CHECK-NEXT:    [[TMP11:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1
-; CHECK-NEXT:    [[INCDEC_PTR2]] = getelementptr inbounds i8, i8* [[X_ADDR_050]], i32 3
-; CHECK-NEXT:    [[TMP12:%.*]] = load i8, i8* [[INCDEC_PTR1]], align 1
-; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[TMP10]], 10
-; CHECK-NEXT:    [[MUL1:%.*]] = mul i8 [[TMP10]], [[TMP11]]
-; CHECK-NEXT:    [[MUL2:%.*]] = mul i8 [[TMP10]], [[TMP12]]
-; CHECK-NEXT:    [[INCDEC_PTR32:%.*]] = getelementptr inbounds i8, i8* [[Z_ADDR_049]], i32 1
-; CHECK-NEXT:    store i8 [[MUL]], i8* [[Z_ADDR_049]], align 1
-; CHECK-NEXT:    [[INCDEC_PTR33:%.*]] = getelementptr inbounds i8, i8* [[Z_ADDR_049]], i32 2
-; CHECK-NEXT:    store i8 [[MUL1]], i8* [[INCDEC_PTR32]], align 1
-; CHECK-NEXT:    [[INCDEC_PTR34]] = getelementptr inbounds i8, i8* [[Z_ADDR_049]], i32 3
-; CHECK-NEXT:    store i8 [[MUL2]], i8* [[INCDEC_PTR33]], align 1
-; CHECK-NEXT:    [[INC]] = add nuw i32 [[I_048]], 1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END]], label [[FOR_BODY]], [[LOOP32:!llvm.loop !.*]]
+; CHECK-NEXT:    [[PTR_IND5]] = getelementptr i8, i8* [[POINTER_PHI4]], i32 12
+; CHECK-NEXT:    br i1 [[TMP9]], label [[END:%.*]], label [[VECTOR_BODY]], [[LOOP26:!llvm.loop !.*]]
 ; CHECK:       end:
 ; CHECK-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/LoopVersioning/exit-block-dominates-rt-check-block.ll b/llvm/test/Transforms/LoopVersioning/exit-block-dominates-rt-check-block.ll
index 960c890516c8..afb5ecfa0288 100644
--- a/llvm/test/Transforms/LoopVersioning/exit-block-dominates-rt-check-block.ll
+++ b/llvm/test/Transforms/LoopVersioning/exit-block-dominates-rt-check-block.ll
@@ -12,7 +12,7 @@
 
 @c1 = external global i16
 
-define void @f(i16 %a) {
+define void @f(i16 %a, [1 x i32]* %p) {
   br label %bb0
 
 bb0:
@@ -24,9 +24,9 @@ bb1:
 
 bb2:
   %tmp2 = phi i16 [ %tmp1, %bb1 ], [ %tmp3, %bb2 ]
-  %tmp4 = getelementptr inbounds [1 x i32], [1 x i32]* undef, i32 0, i32 4
+  %tmp4 = getelementptr inbounds [1 x i32], [1 x i32]* %p, i32 0, i32 4
   store i32 1, i32* %tmp4
-  %tmp5 = getelementptr inbounds [1 x i32], [1 x i32]* undef, i32 0, i32 9
+  %tmp5 = getelementptr inbounds [1 x i32], [1 x i32]* %p, i32 0, i32 9
   store i32 0, i32* %tmp5
   %tmp3 = add i16 %tmp2, 1
   store i16 %tmp2, i16* @c1


        


More information about the llvm-commits mailing list