[PATCH] Fix SROA regression causing data corruption, fixes PR19250

Duncan P. N. Exon Smith dexonsmith at apple.com
Mon Jun 16 17:32:54 PDT 2014


> On 2014-Jun-16, at 15:30, Björn Steinbrink <bsteinbr at gmail.com> wrote:
> 
> Hi Duncan,
> 
> I seem to be unable to get the review system to add you to the list of
> mail receivers. Please excuse this extra mail.

No problem -- thanks for the ping.  I don't use Phabricator much, so
there's a chance my account isn't behaving correctly and I haven't even
noticed.  Anyway, I prefer email.

> I had already added the
> requested comment and extra test for the order indepence last week, but
> happened to re-read the commit message today and rewrote that to be
> (hopefully) more correct / easier to understand. I also simplified the
> original testcase a bit.

LGTM with a couple of style fixups (that I did myself).

  - Changed the test filenames to use hyphens.
  - Slightly changed the comment you added.

Committed in r211082!

> 
> Greetings
> Björn
> 
> PS: I should probably mention that I will need someone to commit this
> for me as I do not have commit access.
> 
> On 2014.06.16 22:16:49 +0000, Björn Steinbrink wrote:
>> Fix SROA regression causing miscompilation, fixes PR19250
>> 
>> Summary:
>> r199771 accidently changed the logic that excludes integer types that
>> are not a byte width multiple from being used as the type used for a
>> newly formed partition.
>> 
>> This means that we can, for example, end up with a partition that spans
>> a single byte and has two users, one load with type i1 and another one
>> with type i32. The latter is considered to be splittable and expands
>> beyond the partition.
>> 
>> Now in findCommonType, i1 is assigned to Ty before the check that
>> rejects i1 because its width is not a byte width multiple. And because
>> it is the only type that spans the whole partition, TyIsCommon remains
>> true and i1 is returned as the common type.
>> 
>> Since such types are not actually supported, this causes some bits to be
>> zeroed out in the load of the i32 value, because the first byte gets
>> truncated to i1 and then zero-extended.
>> 
>> To fix this problem, we must make sure to check for non-byte width types
>> first and always reject them.
>> 
>> Reviewers: dexonsmith
>> Cc: aprantl
>> 
>> Differential Revision: http://reviews.llvm.org/D3297
>> 
>> http://reviews.llvm.org/D3297
>> 
>> Files:
>>  lib/Transforms/Scalar/SROA.cpp
>>  test/Transforms/SROA/order_independence.ll
>>  test/Transforms/SROA/slicewidth.ll
> 
>> Index: lib/Transforms/Scalar/SROA.cpp
>> ===================================================================
>> --- lib/Transforms/Scalar/SROA.cpp
>> +++ lib/Transforms/Scalar/SROA.cpp
>> @@ -1032,11 +1032,6 @@
>>       UserTy = SI->getValueOperand()->getType();
>>     }
>> 
>> -    if (!UserTy || (Ty && Ty != UserTy))
>> -      TyIsCommon = false; // Give up on anything but an iN type.
>> -    else
>> -      Ty = UserTy;
>> -
>>     if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
>>       // If the type is larger than the partition, skip it. We only encounter
>>       // this for split integer operations where we want to use the type of the
>> @@ -1051,6 +1046,13 @@
>>       if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())
>>         ITy = UserITy;
>>     }
>> +
>> +    // To keep thing independent from the order of slices, Ty and TyIsCommon
>> +    // must not depend on types skipped above
>> +    if (!UserTy || (Ty && Ty != UserTy))
>> +      TyIsCommon = false; // Give up on anything but an iN type.
>> +    else
>> +      Ty = UserTy;
>>   }
>> 
>>   return TyIsCommon ? Ty : ITy;
>> Index: test/Transforms/SROA/order_independence.ll
>> ===================================================================
>> --- /dev/null
>> +++ test/Transforms/SROA/order_independence.ll
>> @@ -0,0 +1,38 @@
>> +; RUN: opt < %s -sroa -S | FileCheck %s
>> +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
>> +
>> +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
>> +
>> +; Check that the chosen type for a split is independent from the order of
>> +; slices even in case of types that are skipped because their width is not a
>> +; byte width multiple
>> +
>> +define void @skipped_inttype_first({ i16*, i32 }*) {
>> +; CHECK-LABEL: @skipped_inttype_first
>> +; CHECK: alloca i8*
>> +  %arg = alloca { i16*, i32 }, align 8
>> +  %2 = bitcast { i16*, i32 }* %0 to i8*
>> +  %3 = bitcast { i16*, i32 }* %arg to i8*
>> +  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* %2, i32 16, i32 8, i1 false)
>> +  %b = getelementptr inbounds { i16*, i32 }* %arg, i64 0, i32 0
>> +  %pb0 = bitcast i16** %b to i63*
>> +  %b0 = load i63* %pb0
>> +  %pb1 = bitcast i16** %b to i8**
>> +  %b1 = load i8** %pb1
>> +  ret void
>> +}
>> +
>> +define void @skipped_inttype_last({ i16*, i32 }*) {
>> +; CHECK-LABEL: @skipped_inttype_last
>> +; CHECK: alloca i8*
>> +  %arg = alloca { i16*, i32 }, align 8
>> +  %2 = bitcast { i16*, i32 }* %0 to i8*
>> +  %3 = bitcast { i16*, i32 }* %arg to i8*
>> +  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* %2, i32 16, i32 8, i1 false)
>> +  %b = getelementptr inbounds { i16*, i32 }* %arg, i64 0, i32 0
>> +  %pb1 = bitcast i16** %b to i8**
>> +  %b1 = load i8** %pb1
>> +  %pb0 = bitcast i16** %b to i63*
>> +  %b0 = load i63* %pb0
>> +  ret void
>> +}
>> Index: test/Transforms/SROA/slicewidth.ll
>> ===================================================================
>> --- /dev/null
>> +++ test/Transforms/SROA/slicewidth.ll
>> @@ -0,0 +1,25 @@
>> +; RUN: opt < %s -sroa -S | FileCheck %s
>> +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
>> +
>> +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
>> +
>> +define void @no_split_on_non_byte_width(i32) {
>> +; This tests that allocas are not split into slices that are not byte width multiple
>> +  %arg = alloca i32 , align 8
>> +  store i32 %0, i32* %arg
>> +  br label %load_i32
>> +
>> +load_i32:
>> +; CHECK-LABEL: load_i32:
>> +; CHECK-NOT: bitcast {{.*}} to i1
>> +; CHECK-NOT: zext i1
>> +  %r0 = load i32* %arg
>> +  br label %load_i1
>> +
>> +load_i1:
>> +; CHECK-LABEL: load_i1:
>> +; CHECK: bitcast {{.*}} to i1
>> +  %p1 = bitcast i32* %arg to i1*
>> +  %t1 = load i1* %p1
>> +  ret void
>> +}





More information about the llvm-commits mailing list