[PATCH] D98714: [SLP] Add insertelement instructions to vectorizable tree

Jordan Rupprecht via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu May 13 19:46:29 PDT 2021


rupprecht added a comment.

This patch introduces an assertion error we believe may be contributing to a miscompile (along with some other recent SLP patches -- this patch fixes the reduced case in http://llvm.org/PR50323, but doesn't fix the full case it was reduced from):

  $ opt reduced.ll -disable-output -O1 -slp-vectorizer  # See below for reduced.ll
  opt: /home/rupprecht/src/llvm-project/llvm/lib/IR/Type.cpp:648: static llvm::FixedVectorType *llvm::FixedVectorType::get(llvm::Type *, unsigned int): Assertion `isValidElementType(ElementType) && "Element type of a VectorType must " "be an integer, floating point, or " "pointer type."' failed.
  PLEASE submit a bug report to https://bugs.llvm.org/ and include the crash backtrace.
  Stack dump:
  0.      Program arguments: /home/rupprecht/dev/opt reduced.ll -disable-output -O1 -slp-vectorizer
  ...
  #10 0x000000000697c8f8 llvm::FixedVectorType::get(llvm::Type*, unsigned int) /home/rupprecht/src/llvm-project/llvm/lib/IR/Type.cpp:650:36
  #11 0x0000000007752a5e llvm::slpvectorizer::BoUpSLP::getSpillCost() const /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:4321:21
  #12 0x0000000007753060 llvm::slpvectorizer::BoUpSLP::getTreeCost() /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:4384:31
  #13 0x000000000775f888 llvm::SLPVectorizerPass::tryToVectorizeList(llvm::ArrayRef<llvm::Value*>, llvm::slpvectorizer::BoUpSLP&, bool) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:6740:32
  #14 0x0000000007760dab llvm::SLPVectorizerPass::vectorizeInsertElementInst(llvm::InsertElementInst*, llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:7844:3
  #15 0x0000000007760f84 llvm::SLPVectorizerPass::vectorizeSimpleInstructions(llvm::SmallVectorImpl<llvm::Instruction*>&, llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&, bool) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:7858:21
  #16 0x000000000775d5d8 llvm::SLPVectorizerPass::vectorizeChainsInBlock(llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:8019:21
  #17 0x000000000775c6d3 llvm::SLPVectorizerPass::runImpl(llvm::Function&, llvm::ScalarEvolution*, llvm::TargetTransformInfo*, llvm::TargetLibraryInfo*, llvm::AAResults*, llvm::LoopInfo*, llvm::DominatorTree*, llvm::AssumptionCache*, llvm::DemandedBits*, llvm::OptimizationRemarkEmitter*) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:6395:16
  #18 0x000000000775c26f llvm::SLPVectorizerPass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:6332:8
  ...
  $ cat reduced.ll
  ; ModuleID = 'reduced.ll'
  source_filename = "repro.cc"
  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
  target triple = "x86_64-unknown-linux-gnu"
  
  %struct.widget = type { %struct.baz }
  %struct.baz = type { double, double }
  %struct.snork = type { <2 x double> }
  %struct.spam = type { %struct.snork }
  
  $_ZN1dC2Edd = comdat any
  
  $_ZN1k1lE1d = comdat any
  
  $_ZN1d1hES_ = comdat any
  
  $_ZN1d1fEv = comdat any
  
  $_ZN1d1eEv = comdat any
  
  @global = external global %struct.widget, align 8
  
  define <2 x double> @zot(%struct.widget* %arg, %struct.baz* %arg1) align 2 {
  bb:
    %tmp = alloca %struct.snork, align 16
    %tmp2 = alloca %struct.widget*, align 8
    %tmp3 = alloca %struct.baz*, align 8
    store %struct.widget* %arg, %struct.widget** %tmp2, align 8, !tbaa !0
    store %struct.baz* %arg1, %struct.baz** %tmp3, align 8, !tbaa !0
    %tmp4 = load %struct.widget*, %struct.widget** %tmp2, align 8
    %tmp5 = load %struct.baz*, %struct.baz** %tmp3, align 8, !tbaa !0
    %tmp6 = getelementptr inbounds %struct.baz, %struct.baz* %tmp5, i32 0, i32 1
    %tmp7 = load double, double* %tmp6, align 8, !tbaa !4
    %tmp8 = getelementptr inbounds %struct.widget, %struct.widget* %tmp4, i32 0, i32 0
    %tmp9 = getelementptr inbounds %struct.baz, %struct.baz* %tmp8, i32 0, i32 1
    %tmp10 = load double, double* %tmp9, align 8, !tbaa !7
    %tmp11 = fsub double %tmp7, %tmp10
    %tmp12 = load %struct.baz*, %struct.baz** %tmp3, align 8, !tbaa !0
    %tmp13 = getelementptr inbounds %struct.baz, %struct.baz* %tmp12, i32 0, i32 0
    %tmp14 = load double, double* %tmp13, align 8, !tbaa !9
    %tmp15 = getelementptr inbounds %struct.widget, %struct.widget* %tmp4, i32 0, i32 0
    %tmp16 = getelementptr inbounds %struct.baz, %struct.baz* %tmp15, i32 0, i32 0
    %tmp17 = load double, double* %tmp16, align 8, !tbaa !10
    %tmp18 = fsub double %tmp14, %tmp17
    call void @wombat(%struct.snork* %tmp, double %tmp11, double %tmp18)
    %tmp19 = getelementptr inbounds %struct.snork, %struct.snork* %tmp, i32 0, i32 0
    %tmp20 = load <2 x double>, <2 x double>* %tmp19, align 16
    ret <2 x double> %tmp20
  }
  
  define linkonce_odr void @wombat(%struct.snork* %arg, double %arg1, double %arg2) unnamed_addr comdat($_ZN1dC2Edd) align 2 {
  bb:
    %tmp = alloca %struct.snork*, align 8
    %tmp3 = alloca double, align 8
    %tmp4 = alloca double, align 8
    store %struct.snork* %arg, %struct.snork** %tmp, align 8, !tbaa !0
    store double %arg1, double* %tmp3, align 8, !tbaa !11
    store double %arg2, double* %tmp4, align 8, !tbaa !11
    %tmp5 = load %struct.snork*, %struct.snork** %tmp, align 8
    %tmp6 = getelementptr inbounds %struct.snork, %struct.snork* %tmp5, i32 0, i32 0
    %tmp7 = load double, double* %tmp3, align 8, !tbaa !11
    %tmp8 = insertelement <2 x double> undef, double %tmp7, i32 0
    %tmp9 = load double, double* %tmp4, align 8, !tbaa !11
    %tmp10 = insertelement <2 x double> %tmp8, double %tmp9, i32 1
    store <2 x double> %tmp10, <2 x double>* %tmp6, align 16, !tbaa !12
    ret void
  }
  
  define double @wombat.1() {
  bb:
    %tmp = alloca %struct.widget, align 8
    %tmp1 = alloca %struct.spam, align 16
    %tmp2 = alloca %struct.snork, align 16
    %tmp3 = alloca %struct.baz, align 8
    %tmp4 = bitcast %struct.widget* %tmp to i8*
    call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp4, i8* bitcast (%struct.widget* @global to i8*), i64 16, i1 false), !tbaa.struct !13
    %tmp5 = bitcast %struct.spam* %tmp1 to i8*
    call void @llvm.memset.p0i8.i64(i8* %tmp5, i8 0, i64 16, i1 false)
    call void @quux()
    %tmp6 = getelementptr inbounds %struct.baz, %struct.baz* %tmp3, i32 0, i32 0
    store double 0.000000e+00, double* %tmp6, align 8, !tbaa !9
    %tmp7 = getelementptr inbounds %struct.baz, %struct.baz* %tmp3, i32 0, i32 1
    store double 0.000000e+00, double* %tmp7, align 8, !tbaa !4
    %tmp8 = call <2 x double> @zot(%struct.widget* %tmp, %struct.baz* %tmp3)
    %tmp9 = getelementptr inbounds %struct.snork, %struct.snork* %tmp2, i32 0, i32 0
    store <2 x double> %tmp8, <2 x double>* %tmp9, align 16
    %tmp10 = getelementptr inbounds %struct.snork, %struct.snork* %tmp2, i32 0, i32 0
    %tmp11 = load <2 x double>, <2 x double>* %tmp10, align 16
    %tmp12 = call double @wobble(%struct.spam* %tmp1, <2 x double> %tmp11)
    ret double %tmp12
  }
  
  ; Function Attrs: argmemonly nofree nosync nounwind willreturn
  declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #0
  
  ; Function Attrs: argmemonly nofree nosync nounwind willreturn
  declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0
  
  ; Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
  declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #1
  
  declare void @quux() unnamed_addr align 2
  
  define linkonce_odr double @wobble(%struct.spam* %arg, <2 x double> %arg1) comdat($_ZN1k1lE1d) align 2 {
  bb:
    %tmp = alloca %struct.snork, align 16
    %tmp2 = alloca %struct.spam*, align 8
    %tmp3 = alloca %struct.snork, align 16
    %tmp4 = alloca %struct.snork, align 16
    %tmp5 = getelementptr inbounds %struct.snork, %struct.snork* %tmp, i32 0, i32 0
    store <2 x double> %arg1, <2 x double>* %tmp5, align 16
    store %struct.spam* %arg, %struct.spam** %tmp2, align 8, !tbaa !0
    %tmp6 = load %struct.spam*, %struct.spam** %tmp2, align 8
    %tmp7 = getelementptr inbounds %struct.spam, %struct.spam* %tmp6, i32 0, i32 0
    %tmp8 = bitcast %struct.snork* %tmp3 to i8*
    %tmp9 = bitcast %struct.snork* %tmp7 to i8*
    call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp8, i8* %tmp9, i64 16, i1 false), !tbaa.struct !14
    %tmp10 = bitcast %struct.snork* %tmp4 to i8*
    %tmp11 = bitcast %struct.snork* %tmp to i8*
    call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp10, i8* %tmp11, i64 16, i1 false), !tbaa.struct !14
    %tmp12 = getelementptr inbounds %struct.snork, %struct.snork* %tmp3, i32 0, i32 0
    %tmp13 = load <2 x double>, <2 x double>* %tmp12, align 16
    %tmp14 = getelementptr inbounds %struct.snork, %struct.snork* %tmp4, i32 0, i32 0
    %tmp15 = load <2 x double>, <2 x double>* %tmp14, align 16
    %tmp16 = call double @eggs(<2 x double> %tmp13, <2 x double> %tmp15)
    ret double %tmp16
  }
  
  ; Function Attrs: argmemonly nofree nosync nounwind willreturn
  declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #0
  
  define linkonce_odr double @eggs(<2 x double> %arg, <2 x double> %arg1) align 2 {
  bb:
    %tmp = alloca %struct.snork, align 16
    %tmp2 = alloca %struct.snork, align 16
    %tmp3 = alloca %struct.snork, align 16
    %tmp4 = getelementptr inbounds %struct.snork, %struct.snork* %tmp, i32 0, i32 0
    store <2 x double> %arg, <2 x double>* %tmp4, align 16
    %tmp5 = getelementptr inbounds %struct.snork, %struct.snork* %tmp2, i32 0, i32 0
    store <2 x double> %arg1, <2 x double>* %tmp5, align 16
    %tmp6 = bitcast %struct.snork* %tmp3 to i8*
    %tmp7 = bitcast %struct.snork* %tmp2 to i8*
    call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp6, i8* %tmp7, i64 16, i1 false), !tbaa.struct !14
    %tmp8 = getelementptr inbounds %struct.snork, %struct.snork* %tmp3, i32 0, i32 0
    %tmp9 = load <2 x double>, <2 x double>* %tmp8, align 16
    %tmp10 = call double @wobble.2(%struct.snork* %tmp, <2 x double> %tmp9)
    ret double %tmp10
  }
  
  define linkonce_odr double @wobble.2(%struct.snork* %arg, <2 x double> %arg1) comdat($_ZN1d1hES_) align 2 {
  bb:
    %tmp = alloca %struct.snork, align 16
    %tmp2 = alloca %struct.snork*, align 8
    %tmp3 = alloca %struct.snork, align 16
    %tmp4 = getelementptr inbounds %struct.snork, %struct.snork* %tmp, i32 0, i32 0
    store <2 x double> %arg1, <2 x double>* %tmp4, align 16
    store %struct.snork* %arg, %struct.snork** %tmp2, align 8, !tbaa !0
    %tmp5 = load %struct.snork*, %struct.snork** %tmp2, align 8
    %tmp6 = call double @quux.3(%struct.snork* %tmp)
    %tmp7 = call double @zot.4(%struct.snork* %tmp)
    call void @wombat(%struct.snork* %tmp3, double %tmp6, double %tmp7)
    %tmp8 = getelementptr inbounds %struct.snork, %struct.snork* %tmp5, i32 0, i32 0
    %tmp9 = load <2 x double>, <2 x double>* %tmp8, align 16, !tbaa !12
    %tmp10 = getelementptr inbounds %struct.snork, %struct.snork* %tmp3, i32 0, i32 0
    %tmp11 = load <2 x double>, <2 x double>* %tmp10, align 16, !tbaa !12
    %tmp12 = fmul <2 x double> %tmp11, %tmp9
    store <2 x double> %tmp12, <2 x double>* %tmp10, align 16, !tbaa !12
    %tmp13 = call double @zot.4(%struct.snork* %tmp3)
    %tmp14 = call double @quux.3(%struct.snork* %tmp3)
    %tmp15 = fsub double %tmp13, %tmp14
    ret double %tmp15
  }
  
  define linkonce_odr double @quux.3(%struct.snork* %arg) comdat($_ZN1d1fEv) align 2 {
  bb:
    %tmp = alloca %struct.snork*, align 8
    store %struct.snork* %arg, %struct.snork** %tmp, align 8, !tbaa !0
    %tmp1 = load %struct.snork*, %struct.snork** %tmp, align 8
    %tmp2 = getelementptr inbounds %struct.snork, %struct.snork* %tmp1, i32 0, i32 0
    %tmp3 = load <2 x double>, <2 x double>* %tmp2, align 16, !tbaa !12
    %tmp4 = extractelement <2 x double> %tmp3, i32 1
    ret double %tmp4
  }
  
  define linkonce_odr double @zot.4(%struct.snork* %arg) comdat($_ZN1d1eEv) align 2 {
  bb:
    %tmp = alloca %struct.snork*, align 8
    store %struct.snork* %arg, %struct.snork** %tmp, align 8, !tbaa !0
    %tmp1 = load %struct.snork*, %struct.snork** %tmp, align 8
    %tmp2 = getelementptr inbounds %struct.snork, %struct.snork* %tmp1, i32 0, i32 0
    %tmp3 = load <2 x double>, <2 x double>* %tmp2, align 16, !tbaa !12
    %tmp4 = extractelement <2 x double> %tmp3, i32 0
    ret double %tmp4
  }
  
  attributes #0 = { argmemonly nofree nosync nounwind willreturn }
  attributes #1 = { argmemonly nofree nosync nounwind willreturn writeonly }
  
  !0 = !{!1, !1, i64 0}
  !1 = !{!"any pointer", !2, i64 0}
  !2 = !{!"omnipotent char", !3, i64 0}
  !3 = !{!"Simple C++ TBAA"}
  !4 = !{!5, !6, i64 8}
  !5 = !{!"_ZTS1a", !6, i64 0, !6, i64 8}
  !6 = !{!"double", !2, i64 0}
  !7 = !{!8, !6, i64 8}
  !8 = !{!"_ZTS1p", !5, i64 0}
  !9 = !{!5, !6, i64 0}
  !10 = !{!8, !6, i64 0}
  !11 = !{!6, !6, i64 0}
  !12 = !{!2, !2, i64 0}
  !13 = !{i64 0, i64 8, !11, i64 8, i64 8, !11}
  !14 = !{i64 0, i64 16, !12}

(Sorry for the length -- this is as far as llvm-reduce would take it)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98714/new/

https://reviews.llvm.org/D98714



More information about the llvm-commits mailing list