[llvm] r322533 - [SROA] fix assetion failure

Mon Jan 15 22:23:05 PST 2018

Author: inouehrs
Date: Mon Jan 15 22:23:05 2018
New Revision: 322533

URL: http://llvm.org/viewvc/llvm-project?rev=322533&view=rev
Log:
[SROA] fix assetion failure

This patch fixes the assertion failure in SROA reported in PR35657.
PR35657 reports the assertion failure due to r319522 (splitting for non-whole-alloca slices), but this problem can happen even without r319522.

The problem exists in a check for reusing an existing alloca when rewriting partitions. As the original comment said, we can reuse the existing alloca if the new alloca has the same type and offset with the existing one. But the code checks only type of the alloca and then check the offset using an assert.
In a corner case with out-of-bounds access (e.g. @PR35657 function added in unit test), it is possible that the two allocas have the same type but different offsets.

This patch makes the check of the offset in the if condition, and re-enables the splitting for non-whole-alloca slices.

Differential Revision: https://reviews.llvm.org/D41981


Modified:
    llvm/trunk/lib/Transforms/Scalar/SROA.cpp
    llvm/trunk/test/DebugInfo/X86/sroasplit-2.ll
    llvm/trunk/test/Transforms/SROA/basictest.ll
    llvm/trunk/test/Transforms/SROA/big-endian.ll

Modified: llvm/trunk/lib/Transforms/Scalar/SROA.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SROA.cpp?rev=322533&r1=322532&r2=322533&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Scalar/SROA.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/SROA.cpp Mon Jan 15 22:23:05 2018
@@ -124,11 +124,6 @@ static cl::opt<bool> SROARandomShuffleSl
 static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds", cl::init(false),
                                         cl::Hidden);
 
-/// Hidden option to allow more aggressive splitting.
-static cl::opt<bool>
-SROASplitNonWholeAllocaSlices("sroa-split-nonwhole-alloca-slices",
-                              cl::init(false), cl::Hidden);
-
 namespace {
 
 /// \brief A custom IRBuilder inserter which prefixes all names, but only in
@@ -3931,10 +3926,10 @@ AllocaInst *SROA::rewritePartition(Alloc
   // exact same type as the original, and with the same access offsets. In that
   // case, re-use the existing alloca, but still run through the rewriter to
   // perform phi and select speculation.
+  // P.beginOffset() can be non-zero even with the same type in a case with
+  // out-of-bounds access (e.g. @PR35657 function in SROA/basictest.ll).
   AllocaInst *NewAI;
-  if (SliceTy == AI.getAllocatedType()) {
-    assert(P.beginOffset() == 0 &&
-           "Non-zero begin offset but same alloca type");
+  if (SliceTy == AI.getAllocatedType() && P.beginOffset() == 0) {
     NewAI = &AI;
     // FIXME: We should be able to bail at this point with "nothing changed".
     // FIXME: We might want to defer PHI speculation until after here.
@@ -4060,7 +4055,7 @@ bool SROA::splitAlloca(AllocaInst &AI, A
 
   uint64_t AllocaSize = DL.getTypeAllocSize(AI.getAllocatedType());
   const uint64_t MaxBitVectorSize = 1024;
-  if (SROASplitNonWholeAllocaSlices && AllocaSize <= MaxBitVectorSize) {
+  if (AllocaSize <= MaxBitVectorSize) {
     // If a byte boundary is included in any load or store, a slice starting or
     // ending at the boundary is not splittable.
     SmallBitVector SplittableOffset(AllocaSize + 1, true);

Modified: llvm/trunk/test/DebugInfo/X86/sroasplit-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/X86/sroasplit-2.ll?rev=322533&r1=322532&r2=322533&view=diff
==============================================================================
--- llvm/trunk/test/DebugInfo/X86/sroasplit-2.ll (original)
+++ llvm/trunk/test/DebugInfo/X86/sroasplit-2.ll Mon Jan 15 22:23:05 2018
@@ -21,7 +21,8 @@
 
 ; Verify that SROA creates a variable piece when splitting i1.
 ; CHECK:  call void @llvm.dbg.value(metadata i64 %outer.coerce0, metadata ![[O:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)),
-; CHECK:  call void @llvm.dbg.value(metadata i64 %outer.coerce1, metadata ![[O]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)),
+; CHECK:  call void @llvm.dbg.value(metadata i32 {{.*}}, metadata ![[O]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 32)),
+; CHECK:  call void @llvm.dbg.value(metadata i32 {{.*}}, metadata ![[O]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32)),
 ; CHECK:  call void @llvm.dbg.value({{.*}}, metadata ![[I1:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)),
 ; CHECK-DAG: ![[O]] = !DILocalVariable(name: "outer",{{.*}} line: 10
 ; CHECK-DAG: ![[I1]] = !DILocalVariable(name: "i1",{{.*}} line: 11

Modified: llvm/trunk/test/Transforms/SROA/basictest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/basictest.ll?rev=322533&r1=322532&r2=322533&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SROA/basictest.ll (original)
+++ llvm/trunk/test/Transforms/SROA/basictest.ll Mon Jan 15 22:23:05 2018
@@ -1615,13 +1615,13 @@ define i16 @PR24463() {
 ; Ensure we can handle a very interesting case where there is an integer-based
 ; rewrite of the uses of the alloca, but where one of the integers in that is
 ; a sub-integer that requires extraction *and* extends past the end of the
-; alloca. In this case, we should extract the i8 and then zext it to i16.
+; alloca. SROA can split the alloca to avoid shift or trunc.
 ;
 ; CHECK-LABEL: @PR24463(
 ; CHECK-NOT: alloca
-; CHECK: %[[SHIFT:.*]] = lshr i16 0, 8
-; CHECK: %[[TRUNC:.*]] = trunc i16 %[[SHIFT]] to i8
-; CHECK: %[[ZEXT:.*]] = zext i8 %[[TRUNC]] to i16
+; CHECK-NOT: trunc
+; CHECK-NOT: lshr
+; CHECK: %[[ZEXT:.*]] = zext i8 {{.*}} to i16
 ; CHECK: ret i16 %[[ZEXT]]
 entry:
   %alloca = alloca [3 x i8]
@@ -1695,3 +1695,52 @@ bb1:
   call void @llvm.lifetime.end.p0i8(i64 2, i8* %0)
   ret void
 }
+
+; PR35657 reports assertion failure with this code
+define void @PR35657(i64 %v) {
+; CHECK-LABEL: @PR35657
+; CHECK: call void @callee16(i16 %{{.*}})
+; CHECK: call void @callee48(i48 %{{.*}})
+; CHECK: ret void
+entry:
+  %a48 = alloca i48
+  %a48.cast64 = bitcast i48* %a48 to i64*
+  store i64 %v, i64* %a48.cast64
+  %a48.cast16 = bitcast i48* %a48 to i16*
+  %b0_15 = load i16, i16* %a48.cast16
+  %a48.cast8 = bitcast i48* %a48 to i8*
+  %a48_offset2 = getelementptr inbounds i8, i8* %a48.cast8, i64 2
+  %a48_offset2.cast48 = bitcast i8* %a48_offset2 to i48*
+  %b16_63 = load i48, i48* %a48_offset2.cast48, align 2
+  call void @callee16(i16 %b0_15)
+  call void @callee48(i48 %b16_63)
+  ret void
+}
+
+declare void @callee16(i16 %a)
+declare void @callee48(i48 %a)
+
+define void @test28(i64 %v) #0 {
+; SROA should split the first i64 store to avoid additional and/or instructions
+; when storing into i32 fields
+
+; CHECK-LABEL: @test28(
+; CHECK-NOT: alloca
+; CHECK-NOT: and
+; CHECK-NOT: or
+; CHECK:      %[[shift:.*]] = lshr i64 %v, 32
+; CHECK-NEXT: %{{.*}} = trunc i64 %[[shift]] to i32
+; CHECK-NEXT: ret void
+
+entry:
+  %t = alloca { i64, i32, i32 }
+
+  %b = getelementptr { i64, i32, i32 }, { i64, i32, i32 }* %t, i32 0, i32 1
+  %0 = bitcast i32* %b to i64*
+  store i64 %v, i64* %0
+
+  %1 = load i32, i32* %b
+  %c = getelementptr { i64, i32, i32 }, { i64, i32, i32 }* %t, i32 0, i32 2
+  store i32 %1, i32* %c
+  ret void
+}

Modified: llvm/trunk/test/Transforms/SROA/big-endian.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/big-endian.ll?rev=322533&r1=322532&r2=322533&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SROA/big-endian.ll (original)
+++ llvm/trunk/test/Transforms/SROA/big-endian.ll Mon Jan 15 22:23:05 2018
@@ -83,19 +83,34 @@ entry:
   store i16 1, i16* %a0i16ptr
 
   store i8 1, i8* %a2ptr
-; CHECK:      %[[mask1:.*]] = and i40 undef, 4294967295
-; CHECK-NEXT: %[[insert1:.*]] = or i40 %[[mask1]], 4294967296
 
   %a3i24ptr = bitcast i8* %a3ptr to i24*
   store i24 1, i24* %a3i24ptr
-; CHECK-NEXT: %[[mask2:.*]] = and i40 %[[insert1]], -4294967041
-; CHECK-NEXT: %[[insert2:.*]] = or i40 %[[mask2]], 256
 
   %a2i40ptr = bitcast i8* %a2ptr to i40*
   store i40 1, i40* %a2i40ptr
-; CHECK-NEXT: %[[ext3:.*]] = zext i40 1 to i56
-; CHECK-NEXT: %[[mask3:.*]] = and i56 undef, -1099511627776
-; CHECK-NEXT: %[[insert3:.*]] = or i56 %[[mask3]], %[[ext3]]
+
+; the alloca is splitted into multiple slices
+; Here, i8 1 is for %a[6]
+; CHECK: %[[ext1:.*]] = zext i8 1 to i40
+; CHECK-NEXT: %[[mask1:.*]] = and i40 undef, -256
+; CHECK-NEXT: %[[insert1:.*]] = or i40 %[[mask1]], %[[ext1]]
+
+; Here, i24 0 is for %a[3] to %a[5]
+; CHECK-NEXT: %[[ext2:.*]] = zext i24 0 to i40
+; CHECK-NEXT: %[[shift2:.*]] = shl i40 %[[ext2]], 8
+; CHECK-NEXT: %[[mask2:.*]] = and i40 %[[insert1]], -4294967041
+; CHECK-NEXT: %[[insert2:.*]] = or i40 %[[mask2]], %[[shift2]]
+
+; Here, i8 0 is for %a[2]
+; CHECK-NEXT: %[[ext3:.*]] = zext i8 0 to i40
+; CHECK-NEXT: %[[shift3:.*]] = shl i40 %[[ext3]], 32
+; CHECK-NEXT: %[[mask3:.*]] = and i40 %[[insert2]], 4294967295
+; CHECK-NEXT: %[[insert3:.*]] = or i40 %[[mask3]], %[[shift3]]
+
+; CHECK-NEXT: %[[ext4:.*]] = zext i40 %[[insert3]] to i56
+; CHECK-NEXT: %[[mask4:.*]] = and i56 undef, -1099511627776
+; CHECK-NEXT: %[[insert4:.*]] = or i56 %[[mask4]], %[[ext4]]
 
 ; CHECK-NOT: store
 ; CHECK-NOT: load
@@ -104,11 +119,12 @@ entry:
   %ai = load i56, i56* %aiptr
   %ret = zext i56 %ai to i64
   ret i64 %ret
-; CHECK-NEXT: %[[ext4:.*]] = zext i16 1 to i56
-; CHECK-NEXT: %[[shift4:.*]] = shl i56 %[[ext4]], 40
-; CHECK-NEXT: %[[mask4:.*]] = and i56 %[[insert3]], 1099511627775
-; CHECK-NEXT: %[[insert4:.*]] = or i56 %[[mask4]], %[[shift4]]
-; CHECK-NEXT: %[[ret:.*]] = zext i56 %[[insert4]] to i64
+; Here, i16 1 is for %a[0] to %a[1]
+; CHECK-NEXT: %[[ext5:.*]] = zext i16 1 to i56
+; CHECK-NEXT: %[[shift5:.*]] = shl i56 %[[ext5]], 40
+; CHECK-NEXT: %[[mask5:.*]] = and i56 %[[insert4]], 1099511627775
+; CHECK-NEXT: %[[insert5:.*]] = or i56 %[[mask5]], %[[shift5]]
+; CHECK-NEXT: %[[ret:.*]] = zext i56 %[[insert5]] to i64
 ; CHECK-NEXT: ret i64 %[[ret]]
 }