[PATCH] D106613: Bad SLPVectorization shufflevector replacement, resulting in write to wrong memory location

Alexey Bataev via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 15 08:38:20 PDT 2021


ABataev added a comment.

In D106613#2970895 <https://reviews.llvm.org/D106613#2970895>, @vtjnash wrote:

> The full reproducer is in the commit message on phabricator for this review, though I don't know how to link to the text of a commit specifically. Here's the text of that:
>
> With `opt -mcpu=haswell -slp-vectorizer`, we see that it might currently produce:
>
>   %10 = getelementptr {}**, <2 x {}***> %9, <2 x i32> <i32 10, i32 4>
>   %11 = bitcast <2 x {}***> %10 to <2 x i64*>
>   ...
>   %27 = extractelement <2 x i64*> %11, i32 0
>   %28 = bitcast i64* %27 to <2 x i64>*
>   store <2 x i64> %22, <2 x i64>* %28, align 4, !tbaa !2
>
> Which is an out-of-bounds store (the extractelement got offset 10
> instead of offset 4 as intended). With the fix, we correctly generate
> extractelement for i32 1 and generate correct code.
>
>   ; ModuleID = 'rand3.ll'
>   source_filename = "rand"
>   target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128-ni:10:11:12:13"
>   target triple = "i686-unknown-linux-gnu"
>   
>   @llvm.compiler.used = appending global [3 x i8*] [i8* bitcast (void ({} addrspace(10)*)* @jl_gc_queue_root to i8*), i8* bitcast ({} addrspace(10)* (i8*, i32, i32)* @jl_gc_pool_alloc to i8*), i8* bitcast ({} addrspace(10)* (i8*, i32)* @jl_gc_big_alloc to i8*)], section "llvm.metadata"
>   
>   ; Function Attrs: sspstrong
>   define void @julia_rand_5(i64* noalias nocapture sret(i64) %0) #0 {
>   top:
>   %1 = call {}*** @julia.get_pgcstack()
>   %2 = getelementptr {}**, {}*** %1, i32 4
>   %3 = bitcast {}*** %2 to i64*
>   %4 = load i64, i64* %3, align 4, !tbaa !2
>   %5 = getelementptr {}**, {}*** %1, i32 6
>   %6 = bitcast {}*** %5 to i64*
>   %7 = load i64, i64* %6, align 4, !tbaa !2
>   %8 = getelementptr {}**, {}*** %1, i32 8
>   %9 = bitcast {}*** %8 to i64*
>   %10 = load i64, i64* %9, align 4, !tbaa !2
>   %11 = getelementptr {}**, {}*** %1, i32 10
>   %12 = bitcast {}*** %11 to i64*
>   %13 = load i64, i64* %12, align 4, !tbaa !2
>   %14 = add i64 %13, %4
>   %15 = call i64 @llvm.fshl.i64(i64 %14, i64 %14, i64 23)
>   %16 = shl i64 %7, 17
>   %17 = xor i64 %10, %4
>   %18 = xor i64 %13, %7
>   %19 = xor i64 %17, %7
>   %20 = xor i64 %18, %4
>   %21 = xor i64 %17, %16
>   %22 = call i64 @llvm.fshl.i64(i64 %18, i64 %18, i64 45)
>   store i64 %20, i64* %3, align 4, !tbaa !2
>   store i64 %19, i64* %6, align 4, !tbaa !2
>   store i64 %21, i64* %9, align 4, !tbaa !2
>   store i64 %22, i64* %12, align 4, !tbaa !2
>   store i64 %15, i64* %0, align 4
>   ret void
>   }
>   
>   define nonnull {} addrspace(10)* @jfptr_rand_6({} addrspace(10)* %0, {} addrspace(10)** %1, i32 %2) #1 {
>   top:
>   %3 = call {}*** @julia.get_pgcstack()
>   %4 = alloca i64, align 8
>   call void @julia_rand_5(i64* noalias nocapture nonnull sret(i64) %4) #5
>   %5 = load i64, i64* %4, align 8, !tbaa !7
>   %6 = call nonnull {} addrspace(10)* @jl_box_uint64(i64 zeroext %5)
>   ret {} addrspace(10)* %6
>   }
>   
>   declare {}*** @julia.get_pgcstack()
>   
>   declare nonnull {} addrspace(10)* @jl_box_uint64(i64 zeroext)
>   
>   ; Function Attrs: inaccessiblemem_or_argmemonly
>   declare void @jl_gc_queue_root({} addrspace(10)*) #2
>   
>   ; Function Attrs: allocsize(1)
>   declare noalias nonnull {} addrspace(10)* @jl_gc_pool_alloc(i8*, i32, i32) #3
>   
>   ; Function Attrs: allocsize(1)
>   declare noalias nonnull {} addrspace(10)* @jl_gc_big_alloc(i8*, i32) #3
>   
>   ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
>   declare i64 @llvm.fshl.i64(i64, i64, i64) #4
>   
>   attributes #0 = { sspstrong "probe-stack"="inline-asm" }
>   attributes #1 = { "probe-stack"="inline-asm" "thunk" }
>   attributes #2 = { inaccessiblemem_or_argmemonly }
>   attributes #3 = { allocsize(1) }
>   attributes #4 = { nofree nosync nounwind readnone speculatable willreturn }
>   attributes #5 = { "probe-stack"="inline-asm" }
>   
>   !llvm.module.flags = !{!0, !1}
>   
>   !0 = !{i32 2, !"Dwarf Version", i32 4}
>   !1 = !{i32 2, !"Debug Info Version", i32 3}
>   !2 = !{!3, !3, i64 0}
>   !3 = !{!"jtbaa_value", !4, i64 0}
>   !4 = !{!"jtbaa_data", !5, i64 0}
>   !5 = !{!"jtbaa", !6, i64 0}
>   !6 = !{!"jtbaa"}
>   !7 = !{!8, !8, i64 0}
>   !8 = !{!"jtbaa_stack", !5, i64 0}

Unable to reproduce. This is what I get with trunk currently:

  opt -S -mcpu=haswell -slp-vectorizer repro.ll -o -
  ; ModuleID = 'repro.ll'
  source_filename = "rand"
  target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128-ni:10:11:12:13"
  target triple = "i686-unknown-linux-gnu"
  
  @llvm.compiler.used = appending global [3 x i8*] [i8* bitcast (void ({} addrspace(10)*)* @jl_gc_queue_root to i8*), i8* bitcast ({} addrspace(10)* (i8*, i32, i32)* @jl_gc_pool_alloc to i8*), i8* bitcast ({} addrspace(10)* (i8*, i32)* @jl
  _gc_big_alloc to i8*)], section "llvm.metadata"
  
  ; Function Attrs: sspstrong
  define void @julia_rand_5(i64* noalias nocapture sret(i64) %0) #0 {
  top:
    %1 = call {}*** @julia.get_pgcstack()
    %2 = getelementptr {}**, {}*** %1, i32 4
    %3 = bitcast {}*** %2 to i64*
    %4 = load i64, i64* %3, align 4, !tbaa !2
    %5 = getelementptr {}**, {}*** %1, i32 6
    %6 = bitcast {}*** %5 to i64*
    %7 = getelementptr {}**, {}*** %1, i32 8
    %8 = bitcast {}*** %7 to i64*
    %9 = bitcast i64* %6 to <2 x i64>*
    %10 = load <2 x i64>, <2 x i64>* %9, align 4, !tbaa !2
    %11 = getelementptr {}**, {}*** %1, i32 10
    %12 = bitcast {}*** %11 to i64*
    %13 = load i64, i64* %12, align 4, !tbaa !2
    %14 = add i64 %13, %4
    %15 = call i64 @llvm.fshl.i64(i64 %14, i64 %14, i64 23)
    %16 = extractelement <2 x i64> %10, i32 0
    %17 = shl i64 %16, 17
    %18 = insertelement <2 x i64> poison, i64 %13, i32 0
    %19 = insertelement <2 x i64> %18, i64 %4, i32 1
    %20 = xor <2 x i64> %19, %10
    %21 = insertelement <2 x i64> poison, i64 %4, i32 0
    %22 = insertelement <2 x i64> %21, i64 %16, i32 1
    %23 = xor <2 x i64> %20, %22
    %24 = extractelement <2 x i64> %20, i32 1
    %25 = xor i64 %24, %17
    %26 = extractelement <2 x i64> %20, i32 0
    %27 = call i64 @llvm.fshl.i64(i64 %26, i64 %26, i64 45)
    %28 = bitcast i64* %3 to <2 x i64>*
    store <2 x i64> %23, <2 x i64>* %28, align 4, !tbaa !2
    store i64 %25, i64* %8, align 4, !tbaa !2
    store i64 %27, i64* %12, align 4, !tbaa !2
    store i64 %15, i64* %0, align 4
    ret void
  }
  
  define nonnull {} addrspace(10)* @jfptr_rand_6({} addrspace(10)* %0, {} addrspace(10)** %1, i32 %2) #1 {
  top:
    %3 = call {}*** @julia.get_pgcstack()
    %4 = alloca i64, align 8
    call void @julia_rand_5(i64* noalias nocapture nonnull sret(i64) %4) #6
    %5 = load i64, i64* %4, align 8, !tbaa !7
    %6 = call nonnull {} addrspace(10)* @jl_box_uint64(i64 zeroext %5)
    ret {} addrspace(10)* %6
  }
  
  declare {}*** @julia.get_pgcstack() #2
  
  declare nonnull {} addrspace(10)* @jl_box_uint64(i64 zeroext) #2
  
  ; Function Attrs: inaccessiblemem_or_argmemonly
  declare void @jl_gc_queue_root({} addrspace(10)*) #3
  
  ; Function Attrs: allocsize(1)
  declare noalias nonnull {} addrspace(10)* @jl_gc_pool_alloc(i8*, i32, i32) #4
  
  ; Function Attrs: allocsize(1)
  declare noalias nonnull {} addrspace(10)* @jl_gc_big_alloc(i8*, i32) #4
  
  ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
  declare i64 @llvm.fshl.i64(i64, i64, i64) #5
  
  attributes #0 = { sspstrong "probe-stack"="inline-asm" "target-cpu"="haswell" }
  attributes #1 = { "probe-stack"="inline-asm" "target-cpu"="haswell" "thunk" }
  attributes #2 = { "target-cpu"="haswell" }
  attributes #3 = { inaccessiblemem_or_argmemonly "target-cpu"="haswell" }
  attributes #4 = { allocsize(1) "target-cpu"="haswell" }
  attributes #5 = { nofree nosync nounwind readnone speculatable willreturn "target-cpu"="haswell" }
  attributes #6 = { "probe-stack"="inline-asm" }
  
  !llvm.module.flags = !{!0, !1}
  
  !0 = !{i32 2, !"Dwarf Version", i32 4}
  !1 = !{i32 2, !"Debug Info Version", i32 3}
  !2 = !{!3, !3, i64 0}
  !3 = !{!"jtbaa_value", !4, i64 0}
  !4 = !{!"jtbaa_data", !5, i64 0}
  !5 = !{!"jtbaa", !6, i64 0}
  !6 = !{!"jtbaa"}
  !7 = !{!8, !8, i64 0}
  !8 = !{!"jtbaa_stack", !5, i64 0}


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106613/new/

https://reviews.llvm.org/D106613



More information about the llvm-commits mailing list