[PATCH] D106613: Bad SLPVectorization shufflevector replacement, resulting in write to wrong memory location
Alexey Bataev via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 15 08:38:20 PDT 2021
ABataev added a comment.
In D106613#2970895 <https://reviews.llvm.org/D106613#2970895>, @vtjnash wrote:
> The full reproducer is in the commit message on phabricator for this review, though I don't know how to link to the text of a commit specifically. Here's the text of that:
>
> With `opt -mcpu=haswell -slp-vectorizer`, we see that it might currently produce:
>
> %10 = getelementptr {}**, <2 x {}***> %9, <2 x i32> <i32 10, i32 4>
> %11 = bitcast <2 x {}***> %10 to <2 x i64*>
> ...
> %27 = extractelement <2 x i64*> %11, i32 0
> %28 = bitcast i64* %27 to <2 x i64>*
> store <2 x i64> %22, <2 x i64>* %28, align 4, !tbaa !2
>
> Which is an out-of-bounds store (the extractelement got offset 10
> instead of offset 4 as intended). With the fix, we correctly generate
> extractelement for i32 1 and generate correct code.
>
> ; ModuleID = 'rand3.ll'
> source_filename = "rand"
> target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128-ni:10:11:12:13"
> target triple = "i686-unknown-linux-gnu"
>
> @llvm.compiler.used = appending global [3 x i8*] [i8* bitcast (void ({} addrspace(10)*)* @jl_gc_queue_root to i8*), i8* bitcast ({} addrspace(10)* (i8*, i32, i32)* @jl_gc_pool_alloc to i8*), i8* bitcast ({} addrspace(10)* (i8*, i32)* @jl_gc_big_alloc to i8*)], section "llvm.metadata"
>
> ; Function Attrs: sspstrong
> define void @julia_rand_5(i64* noalias nocapture sret(i64) %0) #0 {
> top:
> %1 = call {}*** @julia.get_pgcstack()
> %2 = getelementptr {}**, {}*** %1, i32 4
> %3 = bitcast {}*** %2 to i64*
> %4 = load i64, i64* %3, align 4, !tbaa !2
> %5 = getelementptr {}**, {}*** %1, i32 6
> %6 = bitcast {}*** %5 to i64*
> %7 = load i64, i64* %6, align 4, !tbaa !2
> %8 = getelementptr {}**, {}*** %1, i32 8
> %9 = bitcast {}*** %8 to i64*
> %10 = load i64, i64* %9, align 4, !tbaa !2
> %11 = getelementptr {}**, {}*** %1, i32 10
> %12 = bitcast {}*** %11 to i64*
> %13 = load i64, i64* %12, align 4, !tbaa !2
> %14 = add i64 %13, %4
> %15 = call i64 @llvm.fshl.i64(i64 %14, i64 %14, i64 23)
> %16 = shl i64 %7, 17
> %17 = xor i64 %10, %4
> %18 = xor i64 %13, %7
> %19 = xor i64 %17, %7
> %20 = xor i64 %18, %4
> %21 = xor i64 %17, %16
> %22 = call i64 @llvm.fshl.i64(i64 %18, i64 %18, i64 45)
> store i64 %20, i64* %3, align 4, !tbaa !2
> store i64 %19, i64* %6, align 4, !tbaa !2
> store i64 %21, i64* %9, align 4, !tbaa !2
> store i64 %22, i64* %12, align 4, !tbaa !2
> store i64 %15, i64* %0, align 4
> ret void
> }
>
> define nonnull {} addrspace(10)* @jfptr_rand_6({} addrspace(10)* %0, {} addrspace(10)** %1, i32 %2) #1 {
> top:
> %3 = call {}*** @julia.get_pgcstack()
> %4 = alloca i64, align 8
> call void @julia_rand_5(i64* noalias nocapture nonnull sret(i64) %4) #5
> %5 = load i64, i64* %4, align 8, !tbaa !7
> %6 = call nonnull {} addrspace(10)* @jl_box_uint64(i64 zeroext %5)
> ret {} addrspace(10)* %6
> }
>
> declare {}*** @julia.get_pgcstack()
>
> declare nonnull {} addrspace(10)* @jl_box_uint64(i64 zeroext)
>
> ; Function Attrs: inaccessiblemem_or_argmemonly
> declare void @jl_gc_queue_root({} addrspace(10)*) #2
>
> ; Function Attrs: allocsize(1)
> declare noalias nonnull {} addrspace(10)* @jl_gc_pool_alloc(i8*, i32, i32) #3
>
> ; Function Attrs: allocsize(1)
> declare noalias nonnull {} addrspace(10)* @jl_gc_big_alloc(i8*, i32) #3
>
> ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
> declare i64 @llvm.fshl.i64(i64, i64, i64) #4
>
> attributes #0 = { sspstrong "probe-stack"="inline-asm" }
> attributes #1 = { "probe-stack"="inline-asm" "thunk" }
> attributes #2 = { inaccessiblemem_or_argmemonly }
> attributes #3 = { allocsize(1) }
> attributes #4 = { nofree nosync nounwind readnone speculatable willreturn }
> attributes #5 = { "probe-stack"="inline-asm" }
>
> !llvm.module.flags = !{!0, !1}
>
> !0 = !{i32 2, !"Dwarf Version", i32 4}
> !1 = !{i32 2, !"Debug Info Version", i32 3}
> !2 = !{!3, !3, i64 0}
> !3 = !{!"jtbaa_value", !4, i64 0}
> !4 = !{!"jtbaa_data", !5, i64 0}
> !5 = !{!"jtbaa", !6, i64 0}
> !6 = !{!"jtbaa"}
> !7 = !{!8, !8, i64 0}
> !8 = !{!"jtbaa_stack", !5, i64 0}
Unable to reproduce. This is what I get with trunk currently:
opt -S -mcpu=haswell -slp-vectorizer repro.ll -o -
; ModuleID = 'repro.ll'
source_filename = "rand"
target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128-ni:10:11:12:13"
target triple = "i686-unknown-linux-gnu"
@llvm.compiler.used = appending global [3 x i8*] [i8* bitcast (void ({} addrspace(10)*)* @jl_gc_queue_root to i8*), i8* bitcast ({} addrspace(10)* (i8*, i32, i32)* @jl_gc_pool_alloc to i8*), i8* bitcast ({} addrspace(10)* (i8*, i32)* @jl
_gc_big_alloc to i8*)], section "llvm.metadata"
; Function Attrs: sspstrong
define void @julia_rand_5(i64* noalias nocapture sret(i64) %0) #0 {
top:
%1 = call {}*** @julia.get_pgcstack()
%2 = getelementptr {}**, {}*** %1, i32 4
%3 = bitcast {}*** %2 to i64*
%4 = load i64, i64* %3, align 4, !tbaa !2
%5 = getelementptr {}**, {}*** %1, i32 6
%6 = bitcast {}*** %5 to i64*
%7 = getelementptr {}**, {}*** %1, i32 8
%8 = bitcast {}*** %7 to i64*
%9 = bitcast i64* %6 to <2 x i64>*
%10 = load <2 x i64>, <2 x i64>* %9, align 4, !tbaa !2
%11 = getelementptr {}**, {}*** %1, i32 10
%12 = bitcast {}*** %11 to i64*
%13 = load i64, i64* %12, align 4, !tbaa !2
%14 = add i64 %13, %4
%15 = call i64 @llvm.fshl.i64(i64 %14, i64 %14, i64 23)
%16 = extractelement <2 x i64> %10, i32 0
%17 = shl i64 %16, 17
%18 = insertelement <2 x i64> poison, i64 %13, i32 0
%19 = insertelement <2 x i64> %18, i64 %4, i32 1
%20 = xor <2 x i64> %19, %10
%21 = insertelement <2 x i64> poison, i64 %4, i32 0
%22 = insertelement <2 x i64> %21, i64 %16, i32 1
%23 = xor <2 x i64> %20, %22
%24 = extractelement <2 x i64> %20, i32 1
%25 = xor i64 %24, %17
%26 = extractelement <2 x i64> %20, i32 0
%27 = call i64 @llvm.fshl.i64(i64 %26, i64 %26, i64 45)
%28 = bitcast i64* %3 to <2 x i64>*
store <2 x i64> %23, <2 x i64>* %28, align 4, !tbaa !2
store i64 %25, i64* %8, align 4, !tbaa !2
store i64 %27, i64* %12, align 4, !tbaa !2
store i64 %15, i64* %0, align 4
ret void
}
define nonnull {} addrspace(10)* @jfptr_rand_6({} addrspace(10)* %0, {} addrspace(10)** %1, i32 %2) #1 {
top:
%3 = call {}*** @julia.get_pgcstack()
%4 = alloca i64, align 8
call void @julia_rand_5(i64* noalias nocapture nonnull sret(i64) %4) #6
%5 = load i64, i64* %4, align 8, !tbaa !7
%6 = call nonnull {} addrspace(10)* @jl_box_uint64(i64 zeroext %5)
ret {} addrspace(10)* %6
}
declare {}*** @julia.get_pgcstack() #2
declare nonnull {} addrspace(10)* @jl_box_uint64(i64 zeroext) #2
; Function Attrs: inaccessiblemem_or_argmemonly
declare void @jl_gc_queue_root({} addrspace(10)*) #3
; Function Attrs: allocsize(1)
declare noalias nonnull {} addrspace(10)* @jl_gc_pool_alloc(i8*, i32, i32) #4
; Function Attrs: allocsize(1)
declare noalias nonnull {} addrspace(10)* @jl_gc_big_alloc(i8*, i32) #4
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
declare i64 @llvm.fshl.i64(i64, i64, i64) #5
attributes #0 = { sspstrong "probe-stack"="inline-asm" "target-cpu"="haswell" }
attributes #1 = { "probe-stack"="inline-asm" "target-cpu"="haswell" "thunk" }
attributes #2 = { "target-cpu"="haswell" }
attributes #3 = { inaccessiblemem_or_argmemonly "target-cpu"="haswell" }
attributes #4 = { allocsize(1) "target-cpu"="haswell" }
attributes #5 = { nofree nosync nounwind readnone speculatable willreturn "target-cpu"="haswell" }
attributes #6 = { "probe-stack"="inline-asm" }
!llvm.module.flags = !{!0, !1}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !{!3, !3, i64 0}
!3 = !{!"jtbaa_value", !4, i64 0}
!4 = !{!"jtbaa_data", !5, i64 0}
!5 = !{!"jtbaa", !6, i64 0}
!6 = !{!"jtbaa"}
!7 = !{!8, !8, i64 0}
!8 = !{!"jtbaa_stack", !5, i64 0}
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D106613/new/
https://reviews.llvm.org/D106613
More information about the llvm-commits
mailing list