[PATCH] D98714: [SLP] Add insertelement instructions to vectorizable tree
Jordan Rupprecht via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu May 13 19:46:29 PDT 2021
rupprecht added a comment.
This patch introduces an assertion error we believe may be contributing to a miscompile (along with some other recent SLP patches -- this patch fixes the reduced case in http://llvm.org/PR50323, but doesn't fix the full case it was reduced from):
$ opt reduced.ll -disable-output -O1 -slp-vectorizer # See below for reduced.ll
opt: /home/rupprecht/src/llvm-project/llvm/lib/IR/Type.cpp:648: static llvm::FixedVectorType *llvm::FixedVectorType::get(llvm::Type *, unsigned int): Assertion `isValidElementType(ElementType) && "Element type of a VectorType must " "be an integer, floating point, or " "pointer type."' failed.
PLEASE submit a bug report to https://bugs.llvm.org/ and include the crash backtrace.
Stack dump:
0. Program arguments: /home/rupprecht/dev/opt reduced.ll -disable-output -O1 -slp-vectorizer
...
#10 0x000000000697c8f8 llvm::FixedVectorType::get(llvm::Type*, unsigned int) /home/rupprecht/src/llvm-project/llvm/lib/IR/Type.cpp:650:36
#11 0x0000000007752a5e llvm::slpvectorizer::BoUpSLP::getSpillCost() const /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:4321:21
#12 0x0000000007753060 llvm::slpvectorizer::BoUpSLP::getTreeCost() /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:4384:31
#13 0x000000000775f888 llvm::SLPVectorizerPass::tryToVectorizeList(llvm::ArrayRef<llvm::Value*>, llvm::slpvectorizer::BoUpSLP&, bool) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:6740:32
#14 0x0000000007760dab llvm::SLPVectorizerPass::vectorizeInsertElementInst(llvm::InsertElementInst*, llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:7844:3
#15 0x0000000007760f84 llvm::SLPVectorizerPass::vectorizeSimpleInstructions(llvm::SmallVectorImpl<llvm::Instruction*>&, llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&, bool) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:7858:21
#16 0x000000000775d5d8 llvm::SLPVectorizerPass::vectorizeChainsInBlock(llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:8019:21
#17 0x000000000775c6d3 llvm::SLPVectorizerPass::runImpl(llvm::Function&, llvm::ScalarEvolution*, llvm::TargetTransformInfo*, llvm::TargetLibraryInfo*, llvm::AAResults*, llvm::LoopInfo*, llvm::DominatorTree*, llvm::AssumptionCache*, llvm::DemandedBits*, llvm::OptimizationRemarkEmitter*) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:6395:16
#18 0x000000000775c26f llvm::SLPVectorizerPass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:6332:8
...
$ cat reduced.ll
; ModuleID = 'reduced.ll'
source_filename = "repro.cc"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%struct.widget = type { %struct.baz }
%struct.baz = type { double, double }
%struct.snork = type { <2 x double> }
%struct.spam = type { %struct.snork }
$_ZN1dC2Edd = comdat any
$_ZN1k1lE1d = comdat any
$_ZN1d1hES_ = comdat any
$_ZN1d1fEv = comdat any
$_ZN1d1eEv = comdat any
@global = external global %struct.widget, align 8
define <2 x double> @zot(%struct.widget* %arg, %struct.baz* %arg1) align 2 {
bb:
%tmp = alloca %struct.snork, align 16
%tmp2 = alloca %struct.widget*, align 8
%tmp3 = alloca %struct.baz*, align 8
store %struct.widget* %arg, %struct.widget** %tmp2, align 8, !tbaa !0
store %struct.baz* %arg1, %struct.baz** %tmp3, align 8, !tbaa !0
%tmp4 = load %struct.widget*, %struct.widget** %tmp2, align 8
%tmp5 = load %struct.baz*, %struct.baz** %tmp3, align 8, !tbaa !0
%tmp6 = getelementptr inbounds %struct.baz, %struct.baz* %tmp5, i32 0, i32 1
%tmp7 = load double, double* %tmp6, align 8, !tbaa !4
%tmp8 = getelementptr inbounds %struct.widget, %struct.widget* %tmp4, i32 0, i32 0
%tmp9 = getelementptr inbounds %struct.baz, %struct.baz* %tmp8, i32 0, i32 1
%tmp10 = load double, double* %tmp9, align 8, !tbaa !7
%tmp11 = fsub double %tmp7, %tmp10
%tmp12 = load %struct.baz*, %struct.baz** %tmp3, align 8, !tbaa !0
%tmp13 = getelementptr inbounds %struct.baz, %struct.baz* %tmp12, i32 0, i32 0
%tmp14 = load double, double* %tmp13, align 8, !tbaa !9
%tmp15 = getelementptr inbounds %struct.widget, %struct.widget* %tmp4, i32 0, i32 0
%tmp16 = getelementptr inbounds %struct.baz, %struct.baz* %tmp15, i32 0, i32 0
%tmp17 = load double, double* %tmp16, align 8, !tbaa !10
%tmp18 = fsub double %tmp14, %tmp17
call void @wombat(%struct.snork* %tmp, double %tmp11, double %tmp18)
%tmp19 = getelementptr inbounds %struct.snork, %struct.snork* %tmp, i32 0, i32 0
%tmp20 = load <2 x double>, <2 x double>* %tmp19, align 16
ret <2 x double> %tmp20
}
define linkonce_odr void @wombat(%struct.snork* %arg, double %arg1, double %arg2) unnamed_addr comdat($_ZN1dC2Edd) align 2 {
bb:
%tmp = alloca %struct.snork*, align 8
%tmp3 = alloca double, align 8
%tmp4 = alloca double, align 8
store %struct.snork* %arg, %struct.snork** %tmp, align 8, !tbaa !0
store double %arg1, double* %tmp3, align 8, !tbaa !11
store double %arg2, double* %tmp4, align 8, !tbaa !11
%tmp5 = load %struct.snork*, %struct.snork** %tmp, align 8
%tmp6 = getelementptr inbounds %struct.snork, %struct.snork* %tmp5, i32 0, i32 0
%tmp7 = load double, double* %tmp3, align 8, !tbaa !11
%tmp8 = insertelement <2 x double> undef, double %tmp7, i32 0
%tmp9 = load double, double* %tmp4, align 8, !tbaa !11
%tmp10 = insertelement <2 x double> %tmp8, double %tmp9, i32 1
store <2 x double> %tmp10, <2 x double>* %tmp6, align 16, !tbaa !12
ret void
}
define double @wombat.1() {
bb:
%tmp = alloca %struct.widget, align 8
%tmp1 = alloca %struct.spam, align 16
%tmp2 = alloca %struct.snork, align 16
%tmp3 = alloca %struct.baz, align 8
%tmp4 = bitcast %struct.widget* %tmp to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp4, i8* bitcast (%struct.widget* @global to i8*), i64 16, i1 false), !tbaa.struct !13
%tmp5 = bitcast %struct.spam* %tmp1 to i8*
call void @llvm.memset.p0i8.i64(i8* %tmp5, i8 0, i64 16, i1 false)
call void @quux()
%tmp6 = getelementptr inbounds %struct.baz, %struct.baz* %tmp3, i32 0, i32 0
store double 0.000000e+00, double* %tmp6, align 8, !tbaa !9
%tmp7 = getelementptr inbounds %struct.baz, %struct.baz* %tmp3, i32 0, i32 1
store double 0.000000e+00, double* %tmp7, align 8, !tbaa !4
%tmp8 = call <2 x double> @zot(%struct.widget* %tmp, %struct.baz* %tmp3)
%tmp9 = getelementptr inbounds %struct.snork, %struct.snork* %tmp2, i32 0, i32 0
store <2 x double> %tmp8, <2 x double>* %tmp9, align 16
%tmp10 = getelementptr inbounds %struct.snork, %struct.snork* %tmp2, i32 0, i32 0
%tmp11 = load <2 x double>, <2 x double>* %tmp10, align 16
%tmp12 = call double @wobble(%struct.spam* %tmp1, <2 x double> %tmp11)
ret double %tmp12
}
; Function Attrs: argmemonly nofree nosync nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #0
; Function Attrs: argmemonly nofree nosync nounwind willreturn
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0
; Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #1
declare void @quux() unnamed_addr align 2
define linkonce_odr double @wobble(%struct.spam* %arg, <2 x double> %arg1) comdat($_ZN1k1lE1d) align 2 {
bb:
%tmp = alloca %struct.snork, align 16
%tmp2 = alloca %struct.spam*, align 8
%tmp3 = alloca %struct.snork, align 16
%tmp4 = alloca %struct.snork, align 16
%tmp5 = getelementptr inbounds %struct.snork, %struct.snork* %tmp, i32 0, i32 0
store <2 x double> %arg1, <2 x double>* %tmp5, align 16
store %struct.spam* %arg, %struct.spam** %tmp2, align 8, !tbaa !0
%tmp6 = load %struct.spam*, %struct.spam** %tmp2, align 8
%tmp7 = getelementptr inbounds %struct.spam, %struct.spam* %tmp6, i32 0, i32 0
%tmp8 = bitcast %struct.snork* %tmp3 to i8*
%tmp9 = bitcast %struct.snork* %tmp7 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp8, i8* %tmp9, i64 16, i1 false), !tbaa.struct !14
%tmp10 = bitcast %struct.snork* %tmp4 to i8*
%tmp11 = bitcast %struct.snork* %tmp to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp10, i8* %tmp11, i64 16, i1 false), !tbaa.struct !14
%tmp12 = getelementptr inbounds %struct.snork, %struct.snork* %tmp3, i32 0, i32 0
%tmp13 = load <2 x double>, <2 x double>* %tmp12, align 16
%tmp14 = getelementptr inbounds %struct.snork, %struct.snork* %tmp4, i32 0, i32 0
%tmp15 = load <2 x double>, <2 x double>* %tmp14, align 16
%tmp16 = call double @eggs(<2 x double> %tmp13, <2 x double> %tmp15)
ret double %tmp16
}
; Function Attrs: argmemonly nofree nosync nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #0
define linkonce_odr double @eggs(<2 x double> %arg, <2 x double> %arg1) align 2 {
bb:
%tmp = alloca %struct.snork, align 16
%tmp2 = alloca %struct.snork, align 16
%tmp3 = alloca %struct.snork, align 16
%tmp4 = getelementptr inbounds %struct.snork, %struct.snork* %tmp, i32 0, i32 0
store <2 x double> %arg, <2 x double>* %tmp4, align 16
%tmp5 = getelementptr inbounds %struct.snork, %struct.snork* %tmp2, i32 0, i32 0
store <2 x double> %arg1, <2 x double>* %tmp5, align 16
%tmp6 = bitcast %struct.snork* %tmp3 to i8*
%tmp7 = bitcast %struct.snork* %tmp2 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp6, i8* %tmp7, i64 16, i1 false), !tbaa.struct !14
%tmp8 = getelementptr inbounds %struct.snork, %struct.snork* %tmp3, i32 0, i32 0
%tmp9 = load <2 x double>, <2 x double>* %tmp8, align 16
%tmp10 = call double @wobble.2(%struct.snork* %tmp, <2 x double> %tmp9)
ret double %tmp10
}
define linkonce_odr double @wobble.2(%struct.snork* %arg, <2 x double> %arg1) comdat($_ZN1d1hES_) align 2 {
bb:
%tmp = alloca %struct.snork, align 16
%tmp2 = alloca %struct.snork*, align 8
%tmp3 = alloca %struct.snork, align 16
%tmp4 = getelementptr inbounds %struct.snork, %struct.snork* %tmp, i32 0, i32 0
store <2 x double> %arg1, <2 x double>* %tmp4, align 16
store %struct.snork* %arg, %struct.snork** %tmp2, align 8, !tbaa !0
%tmp5 = load %struct.snork*, %struct.snork** %tmp2, align 8
%tmp6 = call double @quux.3(%struct.snork* %tmp)
%tmp7 = call double @zot.4(%struct.snork* %tmp)
call void @wombat(%struct.snork* %tmp3, double %tmp6, double %tmp7)
%tmp8 = getelementptr inbounds %struct.snork, %struct.snork* %tmp5, i32 0, i32 0
%tmp9 = load <2 x double>, <2 x double>* %tmp8, align 16, !tbaa !12
%tmp10 = getelementptr inbounds %struct.snork, %struct.snork* %tmp3, i32 0, i32 0
%tmp11 = load <2 x double>, <2 x double>* %tmp10, align 16, !tbaa !12
%tmp12 = fmul <2 x double> %tmp11, %tmp9
store <2 x double> %tmp12, <2 x double>* %tmp10, align 16, !tbaa !12
%tmp13 = call double @zot.4(%struct.snork* %tmp3)
%tmp14 = call double @quux.3(%struct.snork* %tmp3)
%tmp15 = fsub double %tmp13, %tmp14
ret double %tmp15
}
define linkonce_odr double @quux.3(%struct.snork* %arg) comdat($_ZN1d1fEv) align 2 {
bb:
%tmp = alloca %struct.snork*, align 8
store %struct.snork* %arg, %struct.snork** %tmp, align 8, !tbaa !0
%tmp1 = load %struct.snork*, %struct.snork** %tmp, align 8
%tmp2 = getelementptr inbounds %struct.snork, %struct.snork* %tmp1, i32 0, i32 0
%tmp3 = load <2 x double>, <2 x double>* %tmp2, align 16, !tbaa !12
%tmp4 = extractelement <2 x double> %tmp3, i32 1
ret double %tmp4
}
define linkonce_odr double @zot.4(%struct.snork* %arg) comdat($_ZN1d1eEv) align 2 {
bb:
%tmp = alloca %struct.snork*, align 8
store %struct.snork* %arg, %struct.snork** %tmp, align 8, !tbaa !0
%tmp1 = load %struct.snork*, %struct.snork** %tmp, align 8
%tmp2 = getelementptr inbounds %struct.snork, %struct.snork* %tmp1, i32 0, i32 0
%tmp3 = load <2 x double>, <2 x double>* %tmp2, align 16, !tbaa !12
%tmp4 = extractelement <2 x double> %tmp3, i32 0
ret double %tmp4
}
attributes #0 = { argmemonly nofree nosync nounwind willreturn }
attributes #1 = { argmemonly nofree nosync nounwind willreturn writeonly }
!0 = !{!1, !1, i64 0}
!1 = !{!"any pointer", !2, i64 0}
!2 = !{!"omnipotent char", !3, i64 0}
!3 = !{!"Simple C++ TBAA"}
!4 = !{!5, !6, i64 8}
!5 = !{!"_ZTS1a", !6, i64 0, !6, i64 8}
!6 = !{!"double", !2, i64 0}
!7 = !{!8, !6, i64 8}
!8 = !{!"_ZTS1p", !5, i64 0}
!9 = !{!5, !6, i64 0}
!10 = !{!8, !6, i64 0}
!11 = !{!6, !6, i64 0}
!12 = !{!2, !2, i64 0}
!13 = !{i64 0, i64 8, !11, i64 8, i64 8, !11}
!14 = !{i64 0, i64 16, !12}
(Sorry for the length -- this is as far as llvm-reduce would take it)
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D98714/new/
https://reviews.llvm.org/D98714
More information about the llvm-commits
mailing list