[llvm] 9030d90 - [instcombine] Add coverage for consistent use of unescaped malloc case

Tue Feb 22 16:22:03 PST 2022

Author: Philip Reames
Date: 2022-02-22T16:21:56-08:00
New Revision: 9030d90aeb842c43a9e7d44bbf280dca250a72d9

URL: https://github.com/llvm/llvm-project/commit/9030d90aeb842c43a9e7d44bbf280dca250a72d9
DIFF: https://github.com/llvm/llvm-project/commit/9030d90aeb842c43a9e7d44bbf280dca250a72d9.diff

LOG: [instcombine] Add coverage for consistent use of unescaped malloc case

Added: 
    

Modified: 
    llvm/test/Transforms/InstCombine/compare-alloca.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/InstCombine/compare-alloca.ll b/llvm/test/Transforms/InstCombine/compare-alloca.ll
index a9e10724805a..21ec6cac3d68 100644

--- a/llvm/test/Transforms/InstCombine/compare-alloca.ll
+++ b/llvm/test/Transforms/InstCombine/compare-alloca.ll
@@ -118,3 +118,169 @@ define i1 @alloca_call_compare() {
   %cmp = icmp eq i64* %p, %q
   ret i1 %cmp
 }
+
+
+; The next block of tests demonstrate a very subtle correctness requirement.
+; We can generally assume any *single* stack layout we chose for the result of
+; an alloca, but we can't simultanious assume two 
diff erent ones.  As a
+; result, we must make sure that we only fold conditions if we can ensure that
+; we fold *all* potentially address capturing compares the same.
+
+; These two functions represents either a) forging a pointer via inttoptr or
+; b) indexing off an adjacent allocation.  In either case, the operation is
+; obscured by an uninlined helper and not visible to instcombine.
+declare i8* @hidden_inttoptr()
+declare i8* @hidden_offset(i8* %other)
+
+define i1 @ptrtoint_single_cmp() {
+; CHECK-LABEL: @ptrtoint_single_cmp(
+; CHECK-NEXT:    ret i1 false
+;
+  %m = alloca i8, i32 4
+  %rhs = inttoptr i64 2048 to i8*
+  %cmp = icmp eq i8* %m, %rhs
+  ret i1 %cmp
+}
+
+define i1 @offset_single_cmp() {
+; CHECK-LABEL: @offset_single_cmp(
+; CHECK-NEXT:    ret i1 false
+;
+  %m = alloca i8, i32 4
+  %n = alloca i8, i32 4
+  %rhs = getelementptr i8, i8* %n, i32 4
+  %cmp = icmp eq i8* %m, %rhs
+  ret i1 %cmp
+}
+
+define i1 @neg_consistent_fold1() {
+; CHECK-LABEL: @neg_consistent_fold1(
+; CHECK-NEXT:    [[M1:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT:    [[M1_SUB:%.*]] = getelementptr inbounds [4 x i8], [4 x i8]* [[M1]], i32 0, i32 0
+; CHECK-NEXT:    [[RHS2:%.*]] = call i8* @hidden_inttoptr()
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i8* [[M1_SUB]], inttoptr (i64 2048 to i8*)
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i8* [[M1_SUB]], [[RHS2]]
+; CHECK-NEXT:    [[RES:%.*]] = or i1 [[CMP1]], [[CMP2]]
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+  %m = alloca i8, i32 4
+  %rhs = inttoptr i64 2048 to i8*
+  %rhs2 = call i8* @hidden_inttoptr()
+  %cmp1 = icmp eq i8* %m, %rhs
+  %cmp2 = icmp eq i8* %m, %rhs2
+  %res = or i1 %cmp1, %cmp2
+  ret i1 %res
+}
+
+define i1 @neg_consistent_fold2() {
+; CHECK-LABEL: @neg_consistent_fold2(
+; CHECK-NEXT:    [[M1:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT:    [[N2:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT:    [[N2_SUB:%.*]] = getelementptr inbounds [4 x i8], [4 x i8]* [[N2]], i32 0, i32 0
+; CHECK-NEXT:    [[M1_SUB:%.*]] = getelementptr inbounds [4 x i8], [4 x i8]* [[M1]], i32 0, i32 0
+; CHECK-NEXT:    [[RHS:%.*]] = getelementptr inbounds [4 x i8], [4 x i8]* [[N2]], i32 0, i32 4
+; CHECK-NEXT:    [[RHS2:%.*]] = call i8* @hidden_offset(i8* nonnull [[N2_SUB]])
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i8* [[M1_SUB]], [[RHS]]
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i8* [[M1_SUB]], [[RHS2]]
+; CHECK-NEXT:    [[RES:%.*]] = or i1 [[CMP1]], [[CMP2]]
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+  %m = alloca i8, i32 4
+  %n = alloca i8, i32 4
+  %rhs = getelementptr i8, i8* %n, i32 4
+  %rhs2 = call i8* @hidden_offset(i8* %n)
+  %cmp1 = icmp eq i8* %m, %rhs
+  %cmp2 = icmp eq i8* %m, %rhs2
+  %res = or i1 %cmp1, %cmp2
+  ret i1 %res
+}
+
+define i1 @neg_consistent_fold3() {
+; CHECK-LABEL: @neg_consistent_fold3(
+; CHECK-NEXT:    [[M1:%.*]] = alloca i32, align 1
+; CHECK-NEXT:    [[M1_SUB:%.*]] = bitcast i32* [[M1]] to i8*
+; CHECK-NEXT:    [[LGP:%.*]] = load i32*, i32** @gp, align 8
+; CHECK-NEXT:    [[RHS2:%.*]] = call i8* @hidden_inttoptr()
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32* [[M1]], [[LGP]]
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i8* [[RHS2]], [[M1_SUB]]
+; CHECK-NEXT:    [[RES:%.*]] = or i1 [[CMP1]], [[CMP2]]
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+  %m = alloca i8, i32 4
+  %bc = bitcast i8* %m to i32*
+  %lgp = load i32*, i32** @gp, align 8
+  %rhs2 = call i8* @hidden_inttoptr()
+  %cmp1 = icmp eq i32* %bc, %lgp
+  %cmp2 = icmp eq i8* %m, %rhs2
+  %res = or i1 %cmp1, %cmp2
+  ret i1 %res
+}
+
+define i1 @neg_consistent_fold4() {
+; CHECK-LABEL: @neg_consistent_fold4(
+; CHECK-NEXT:    ret i1 false
+;
+  %m = alloca i8, i32 4
+  %bc = bitcast i8* %m to i32*
+  %lgp = load i32*, i32** @gp, align 8
+  %cmp1 = icmp eq i32* %bc, %lgp
+  %cmp2 = icmp eq i32* %bc, %lgp
+  %res = or i1 %cmp1, %cmp2
+  ret i1 %res
+}
+
+; A nocapture call can't cause a consistent result issue as it is (by
+; assumption) not able to contain a comparison which might capture the
+; address.
+
+declare void @unknown(i8*)
+
+; TODO: Missing optimization
+define i1 @consistent_nocapture_inttoptr() {
+; CHECK-LABEL: @consistent_nocapture_inttoptr(
+; CHECK-NEXT:    [[M1:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT:    [[M1_SUB:%.*]] = getelementptr inbounds [4 x i8], [4 x i8]* [[M1]], i32 0, i32 0
+; CHECK-NEXT:    call void @unknown(i8* nocapture nonnull [[M1_SUB]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[M1_SUB]], inttoptr (i64 2048 to i8*)
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %m = alloca i8, i32 4
+  call void @unknown(i8* nocapture %m)
+  %rhs = inttoptr i64 2048 to i8*
+  %cmp = icmp eq i8* %m, %rhs
+  ret i1 %cmp
+}
+
+define i1 @consistent_nocapture_offset() {
+; CHECK-LABEL: @consistent_nocapture_offset(
+; CHECK-NEXT:    [[M1:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT:    [[M1_SUB:%.*]] = getelementptr inbounds [4 x i8], [4 x i8]* [[M1]], i32 0, i32 0
+; CHECK-NEXT:    call void @unknown(i8* nocapture nonnull [[M1_SUB]])
+; CHECK-NEXT:    ret i1 false
+;
+  %m = alloca i8, i32 4
+  call void @unknown(i8* nocapture %m)
+  %n = alloca i8, i32 4
+  %rhs = getelementptr i8, i8* %n, i32 4
+  %cmp = icmp eq i8* %m, %rhs
+  ret i1 %cmp
+}
+
+ at gp = global i32* null, align 8
+; TODO: Missing optimization
+define i1 @consistent_nocapture_through_global() {
+; CHECK-LABEL: @consistent_nocapture_through_global(
+; CHECK-NEXT:    [[M1:%.*]] = alloca i32, align 1
+; CHECK-NEXT:    [[M1_SUB:%.*]] = bitcast i32* [[M1]] to i8*
+; CHECK-NEXT:    call void @unknown(i8* nocapture nonnull [[M1_SUB]])
+; CHECK-NEXT:    [[LGP:%.*]] = load i32*, i32** @gp, align 8, !nonnull !0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32* [[M1]], [[LGP]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %m = alloca i8, i32 4
+  call void @unknown(i8* nocapture %m)
+  %bc = bitcast i8* %m to i32*
+  %lgp = load i32*, i32** @gp, align 8, !nonnull !{}
+  %cmp = icmp eq i32* %bc, %lgp
+  ret i1 %cmp
+}