[PATCH] D107966: [SLP]Do not emit extract elements for insertelements users, replace with shuffles directly.
Shimin Cui via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 16 06:36:42 PDT 2022
scui added a comment.
Our SPEC build on PowerPC failed due to this patch. Following PR (gd.ll) is extracted from gcc_r build:
target datalayout = "E-m:a-p:32:32-i64:64-n32"
target triple = "powerpc-ibm-aix7.2.0.0"
%union.tree_node = type { %struct.tree_optimization_option }
%struct.tree_optimization_option = type { %struct.tree_common, %struct.cl_optimization }
%struct.tree_common = type { %struct.tree_base, %union.tree_node*, %union.tree_node* }
%struct.tree_base = type { i64 }
%struct.cl_optimization = type { i32 }
%struct.c_declarator = type { i32, %struct.c_declarator*, i32, %union.anon.1 }
%union.anon.1 = type { %struct.anon.443 }
%struct.anon.443 = type { %union.tree_node*, i32, %union.tree_node*, i8 }
%struct.c_declspecs = type { %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, i32, i32, i8, i32, i16, i8 }
@flag_isoc99 = internal unnamed_addr global i1 false, align 4
@pedantic = internal global i32 0, align 4
; Function Attrs: nounwind
define fastcc %union.tree_node* @grokdeclarator(%struct.c_declarator* noundef readonly %declarator, %struct.c_declspecs* nocapture noundef %declspecs) unnamed_addr #0 {
entry:
%type = getelementptr inbounds %struct.c_declspecs, %struct.c_declspecs* %declspecs, i32 0, i32 0
%thread_p = getelementptr inbounds %struct.c_declspecs, %struct.c_declspecs* %declspecs, i32 0, i32 8
%p0 = bitcast %struct.c_declarator* %declarator to i64*
%t0 = load i64, i64* %p0, align 8
%cmp00 = icmp eq i64 %t0, 0
br i1 %cmp00, label %if.end10, label %cleanup
if.end10: ; preds = %entry
%t1 = load %union.tree_node*, %union.tree_node** %type, align 4
%t2 = getelementptr %union.tree_node, %union.tree_node* %t1, i32 0, i32 0, i32 0, i32 0, i32 0
%bf.load1 = load i64, i64* %t2, align 8
%bf.lshr.mask5.i = and i64 %bf.load1, -281474976710656
%cmp10 = icmp eq i64 %bf.lshr.mask5.i, 4222124650659840
%extract.t814 = trunc i64 %bf.load1 to i8
%extract.t817 = trunc i64 %bf.load1 to i32
%extract819 = lshr i64 %bf.load1, 43
%extract.t820 = trunc i64 %extract819 to i32
%extract823 = lshr i64 %bf.load1, 44
%extract.t824 = trunc i64 %extract823 to i32
br i1 %cmp10, label %if.then20, label %if.else20
if.then20: ; preds = %if.end10
%type1.i33 = getelementptr inbounds %union.tree_node, %union.tree_node* %t1, i32 0, i32 0, i32 0, i32 2
%t3 = load %union.tree_node*, %union.tree_node** %type1.i33, align 4
%t4 = getelementptr %union.tree_node, %union.tree_node* %t3, i32 0, i32 0, i32 0, i32 0, i32 0
%bf.load2 = load i64, i64* %t4, align 8
%extract.t = trunc i64 %bf.load2 to i8
%extract.t816 = trunc i64 %bf.load2 to i32
%extract = lshr i64 %bf.load2, 43
%extract.t818 = trunc i64 %extract to i32
%extract821 = lshr i64 %bf.load2, 44
%extract.t822 = trunc i64 %extract821 to i32
br label %if.else20
if.else20: ; preds = %if.then20, %if.end10
%bf.load.off0 = phi i8 [ %extract.t, %if.then20 ], [ %extract.t814, %if.end10 ]
%bf.load.off0815 = phi i32 [ %extract.t816, %if.then20 ], [ %extract.t817, %if.end10 ]
%bf.load.off43 = phi i32 [ %extract.t818, %if.then20 ], [ %extract.t820, %if.end10 ]
%bf.load.off44 = phi i32 [ %extract.t822, %if.then20 ], [ %extract.t824, %if.end10 ]
%type.addr.0.lcssa.i = phi %union.tree_node* [ %t3, %if.then20 ], [ %t1, %if.end10 ]
%p5 = getelementptr inbounds %union.tree_node, %union.tree_node* %type.addr.0.lcssa.i, i32 0, i32 0, i32 1, i32 0
%p9 = getelementptr inbounds %struct.c_declspecs, %struct.c_declspecs* %declspecs, i32 0, i32 9
%bf.load154 = load i16, i16* %thread_p, align 4
%bf.lshr155 = lshr i16 %bf.load154, 7
%bf.clear156 = and i16 %bf.lshr155, 1
%bf.cast157 = zext i16 %bf.clear156 to i32
%bf.cast162 = and i32 %bf.load.off43, 1
%add = add nuw nsw i32 %bf.cast162, %bf.cast157
%bf.load168 = load i32, i32* %p5, align 4
%bf.lshr169 = lshr i32 %bf.load168, 18
%t6 = insertelement <2 x i16> poison, i16 %bf.load154, i64 0
%t7 = shufflevector <2 x i16> %t6, <2 x i16> poison, <2 x i32> zeroinitializer
%t8 = lshr <2 x i16> %t7, <i16 5, i16 6>
%t9 = and <2 x i16> %t8, <i16 1, i16 1>
%t10 = zext <2 x i16> %t9 to <2 x i32>
%t11 = insertelement <2 x i32> poison, i32 %bf.lshr169, i64 0
%t12 = insertelement <2 x i32> %t11, i32 %bf.load.off44, i64 1
%t13 = and <2 x i32> %t12, <i32 1, i32 1>
%t14 = add nuw nsw <2 x i32> %t13, %t10
%t15 = load i8, i8* %p9, align 2
%conv188 = zext i8 %t15 to i32
%cmp20 = icmp eq i8 %t15, 0
%conv192 = and i32 %bf.load.off0815, 255
%cond196 = select i1 %cmp20, i32 %bf.load.off0815, i32 %conv188
%t16 = load i32, i32* @pedantic, align 4
%cmp30 = icmp eq i32 %t16, 0
%.b28 = load i1, i1* @flag_isoc99, align 4
%t17 = insertelement <2 x i1> poison, i1 %cmp20, i64 0
%t18 = insertelement <2 x i1> %t17, i1 %cmp30, i64 1
%t19 = zext <2 x i1> %t18 to <2 x i64>
%or.cond1969 = select i1 %cmp30, i1 true, i1 %.b28
br i1 %or.cond1969, label %cleanup, label %if.else30
if.else30: ; preds = %if.else20
%cmp40 = icmp ugt i32 %add, 1
br i1 %cmp40, label %if.then40, label %if.end40
if.then40: ; preds = %if.else30
br label %if.end40
if.end40: ; preds = %if.then40, %if.else30
%t20 = extractelement <2 x i32> %t14, i64 0
%cmp50 = icmp ugt i32 %t20, 1
br i1 %cmp50, label %if.then50, label %if.end50
if.then50: ; preds = %if.end40
br label %if.end50
if.end50: ; preds = %if.then50, %if.end40
br label %cleanup
cleanup: ; preds = %if.end50, %if.else20, %entry
ret %union.tree_node* null
}
attributes #0 = { nounwind "approx-func-fp-math"="true" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr10" "target-features"="+altivec,+bpermd,+crbits,+crypto,+direct-move,+extdiv,+isa-v206-instructions,+isa-v207-instructions,+isa-v30-instructions,+isa-v31-instructions,+mma,+paired-vector-memops,+pcrelative-memops,+power10-vector,+power8-vector,+power9-vector,+prefix-instrs,+vsx,-htm,-privileged,-quadword-atomics,-rop-protect,-spe" }
Here is the dumping with the latest SLPVectorizer.cpp (up to June 16). To reproduce,
opt -slp-vectorizer gd.ll
opt: llvm/main/llvm-project/llvm/lib/IR/Instructions.cpp:2012: llvm::ShuffleVectorInst::ShuffleVectorInst(llvm::Value *, llvm::Value *, ArrayRef<int>, const llvm::Twine &, llvm::Instruction *): Assertion `isValidOperands(V1, V2, Mask) && "Invalid shuffle vector instruction operands!"' failed.
PLEASE submit a bug report to xl_beta at ca.ibm.com and include the crash backtrace.
Stack dump:
0. Program arguments: llvm/main/build/bin/opt -slp-vectorizer gd.ll
#0 0x0000000012ea16d4 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (llvm/main/build/bin/opt+0x12ea16d4)
#1 0x0000000012ea1af4 PrintStackTraceSignalHandler(void*) Signals.cpp:0:0
#2 0x0000000012e9e818 llvm::sys::RunSignalHandlers() (llvm/main/build/bin/opt+0x12e9e818)
#3 0x0000000012ea1dbc SignalHandler(int) Signals.cpp:0:0
#4 0x00007d17768b04c8 (linux-vdso64.so.1+0x4c8)
#5 0x00007d1776130468 __libc_signal_restore_set /build/glibc-tRXAGY/glibc-2.31/signal/../sysdeps/unix/sysv/linux/internal-signals.h:86:3
#6 0x00007d1776130468 raise /build/glibc-tRXAGY/glibc-2.31/signal/../sysdeps/unix/sysv/linux/raise.c:48:3
#7 0x00007d1776107cd0 abort /build/glibc-tRXAGY/glibc-2.31/stdlib/abort.c:79:7
#8 0x00007d177611f5dc __assert_fail_base /build/glibc-tRXAGY/glibc-2.31/assert/assert.c:92:3
#9 0x00007d177611f680 __assert_fail /build/glibc-tRXAGY/glibc-2.31/assert/assert.c:101:3
#10 0x00000000124870cc llvm::ShuffleVectorInst::ShuffleVectorInst(llvm::Value*, llvm::Value*, llvm::ArrayRef<int>, llvm::Twine const&, llvm::Instruction*) (llvm/main/build/bin/opt+0x124870cc)
#11 0x000000001064b62c llvm::IRBuilderBase::CreateShuffleVector(llvm::Value*, llvm::Value*, llvm::ArrayRef<int>, llvm::Twine const&) (llvm/main/build/bin/opt+0x1064b62c)
#12 0x000000001318a698 llvm::slpvectorizer::BoUpSLP::vectorizeTree(llvm::MapVector<llvm::Value*, llvm::SmallVector<llvm::Instruction*, 2u>, llvm::DenseMap<llvm::Value*, unsigned int, llvm::DenseMapInfo<llvm::Value*, void>, llvm::detail::DenseMapPair<llvm::Value*, unsigned int>>, std::vector<std::pair<llvm::Value*, llvm::SmallVector<llvm::Instruction*, 2u>>, std::allocator<std::pair<llvm::Value*, llvm::SmallVector<llvm::Instruction*, 2u>>>>>&)::$_69::operator()(llvm::Value*, llvm::Value*, llvm::ArrayRef<int>) const SLPVectorizer.cpp:0:0
#13 0x000000001314098c llvm::slpvectorizer::BoUpSLP::vectorizeTree(llvm::MapVector<llvm::Value*, llvm::SmallVector<llvm::Instruction*, 2u>, llvm::DenseMap<llvm::Value*, unsigned int, llvm::DenseMapInfo<llvm::Value*, void>, llvm::detail::DenseMapPair<llvm::Value*, unsigned int>>, std::vector<std::pair<llvm::Value*, llvm::SmallVector<llvm::Instruction*, 2u>>, std::allocator<std::pair<llvm::Value*, llvm::SmallVector<llvm::Instruction*, 2u>>>>>&) (llvm/main/build/bin/opt+0x1314098c)
#14 0x0000000013150de0 llvm::SLPVectorizerPass::tryToVectorizeList(llvm::ArrayRef<llvm::Value*>, llvm::slpvectorizer::BoUpSLP&, bool) (llvm/main/build/bin/opt+0x13150de0)
......
Can you please take a look? Thanks!
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D107966/new/
https://reviews.llvm.org/D107966
More information about the llvm-commits
mailing list