[PATCH] Fix SROA regression causing data corruption, fixes PR19250
Duncan P. N. Exon Smith
dexonsmith at apple.com
Mon Jun 16 17:32:54 PDT 2014
> On 2014-Jun-16, at 15:30, Björn Steinbrink <bsteinbr at gmail.com> wrote:
>
> Hi Duncan,
>
> I seem to be unable to get the review system to add you to the list of
> mail receivers. Please excuse this extra mail.
No problem -- thanks for the ping. I don't use Phabricator much, so
there's a chance my account isn't behaving correctly and I haven't even
noticed. Anyway, I prefer email.
> I had already added the
> requested comment and extra test for the order indepence last week, but
> happened to re-read the commit message today and rewrote that to be
> (hopefully) more correct / easier to understand. I also simplified the
> original testcase a bit.
LGTM with a couple of style fixups (that I did myself).
- Changed the test filenames to use hyphens.
- Slightly changed the comment you added.
Committed in r211082!
>
> Greetings
> Björn
>
> PS: I should probably mention that I will need someone to commit this
> for me as I do not have commit access.
>
> On 2014.06.16 22:16:49 +0000, Björn Steinbrink wrote:
>> Fix SROA regression causing miscompilation, fixes PR19250
>>
>> Summary:
>> r199771 accidently changed the logic that excludes integer types that
>> are not a byte width multiple from being used as the type used for a
>> newly formed partition.
>>
>> This means that we can, for example, end up with a partition that spans
>> a single byte and has two users, one load with type i1 and another one
>> with type i32. The latter is considered to be splittable and expands
>> beyond the partition.
>>
>> Now in findCommonType, i1 is assigned to Ty before the check that
>> rejects i1 because its width is not a byte width multiple. And because
>> it is the only type that spans the whole partition, TyIsCommon remains
>> true and i1 is returned as the common type.
>>
>> Since such types are not actually supported, this causes some bits to be
>> zeroed out in the load of the i32 value, because the first byte gets
>> truncated to i1 and then zero-extended.
>>
>> To fix this problem, we must make sure to check for non-byte width types
>> first and always reject them.
>>
>> Reviewers: dexonsmith
>> Cc: aprantl
>>
>> Differential Revision: http://reviews.llvm.org/D3297
>>
>> http://reviews.llvm.org/D3297
>>
>> Files:
>> lib/Transforms/Scalar/SROA.cpp
>> test/Transforms/SROA/order_independence.ll
>> test/Transforms/SROA/slicewidth.ll
>
>> Index: lib/Transforms/Scalar/SROA.cpp
>> ===================================================================
>> --- lib/Transforms/Scalar/SROA.cpp
>> +++ lib/Transforms/Scalar/SROA.cpp
>> @@ -1032,11 +1032,6 @@
>> UserTy = SI->getValueOperand()->getType();
>> }
>>
>> - if (!UserTy || (Ty && Ty != UserTy))
>> - TyIsCommon = false; // Give up on anything but an iN type.
>> - else
>> - Ty = UserTy;
>> -
>> if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
>> // If the type is larger than the partition, skip it. We only encounter
>> // this for split integer operations where we want to use the type of the
>> @@ -1051,6 +1046,13 @@
>> if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())
>> ITy = UserITy;
>> }
>> +
>> + // To keep thing independent from the order of slices, Ty and TyIsCommon
>> + // must not depend on types skipped above
>> + if (!UserTy || (Ty && Ty != UserTy))
>> + TyIsCommon = false; // Give up on anything but an iN type.
>> + else
>> + Ty = UserTy;
>> }
>>
>> return TyIsCommon ? Ty : ITy;
>> Index: test/Transforms/SROA/order_independence.ll
>> ===================================================================
>> --- /dev/null
>> +++ test/Transforms/SROA/order_independence.ll
>> @@ -0,0 +1,38 @@
>> +; RUN: opt < %s -sroa -S | FileCheck %s
>> +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
>> +
>> +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
>> +
>> +; Check that the chosen type for a split is independent from the order of
>> +; slices even in case of types that are skipped because their width is not a
>> +; byte width multiple
>> +
>> +define void @skipped_inttype_first({ i16*, i32 }*) {
>> +; CHECK-LABEL: @skipped_inttype_first
>> +; CHECK: alloca i8*
>> + %arg = alloca { i16*, i32 }, align 8
>> + %2 = bitcast { i16*, i32 }* %0 to i8*
>> + %3 = bitcast { i16*, i32 }* %arg to i8*
>> + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* %2, i32 16, i32 8, i1 false)
>> + %b = getelementptr inbounds { i16*, i32 }* %arg, i64 0, i32 0
>> + %pb0 = bitcast i16** %b to i63*
>> + %b0 = load i63* %pb0
>> + %pb1 = bitcast i16** %b to i8**
>> + %b1 = load i8** %pb1
>> + ret void
>> +}
>> +
>> +define void @skipped_inttype_last({ i16*, i32 }*) {
>> +; CHECK-LABEL: @skipped_inttype_last
>> +; CHECK: alloca i8*
>> + %arg = alloca { i16*, i32 }, align 8
>> + %2 = bitcast { i16*, i32 }* %0 to i8*
>> + %3 = bitcast { i16*, i32 }* %arg to i8*
>> + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* %2, i32 16, i32 8, i1 false)
>> + %b = getelementptr inbounds { i16*, i32 }* %arg, i64 0, i32 0
>> + %pb1 = bitcast i16** %b to i8**
>> + %b1 = load i8** %pb1
>> + %pb0 = bitcast i16** %b to i63*
>> + %b0 = load i63* %pb0
>> + ret void
>> +}
>> Index: test/Transforms/SROA/slicewidth.ll
>> ===================================================================
>> --- /dev/null
>> +++ test/Transforms/SROA/slicewidth.ll
>> @@ -0,0 +1,25 @@
>> +; RUN: opt < %s -sroa -S | FileCheck %s
>> +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
>> +
>> +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
>> +
>> +define void @no_split_on_non_byte_width(i32) {
>> +; This tests that allocas are not split into slices that are not byte width multiple
>> + %arg = alloca i32 , align 8
>> + store i32 %0, i32* %arg
>> + br label %load_i32
>> +
>> +load_i32:
>> +; CHECK-LABEL: load_i32:
>> +; CHECK-NOT: bitcast {{.*}} to i1
>> +; CHECK-NOT: zext i1
>> + %r0 = load i32* %arg
>> + br label %load_i1
>> +
>> +load_i1:
>> +; CHECK-LABEL: load_i1:
>> +; CHECK: bitcast {{.*}} to i1
>> + %p1 = bitcast i32* %arg to i1*
>> + %t1 = load i1* %p1
>> + ret void
>> +}
More information about the llvm-commits
mailing list