[PATCH] D107966: [SLP]Do not emit extract elements for insertelements users, replace with shuffles directly.

Shimin Cui via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 16 06:36:42 PDT 2022


scui added a comment.

Our SPEC build on PowerPC failed due to this patch. Following PR (gd.ll) is extracted from gcc_r build:

  target datalayout = "E-m:a-p:32:32-i64:64-n32"
  target triple = "powerpc-ibm-aix7.2.0.0"
  
  %union.tree_node = type { %struct.tree_optimization_option }
  %struct.tree_optimization_option = type { %struct.tree_common, %struct.cl_optimization }
  %struct.tree_common = type { %struct.tree_base, %union.tree_node*, %union.tree_node* }
  %struct.tree_base = type { i64 }
  %struct.cl_optimization = type { i32 }
  %struct.c_declarator = type { i32, %struct.c_declarator*, i32, %union.anon.1 }
  %union.anon.1 = type { %struct.anon.443 }
  %struct.anon.443 = type { %union.tree_node*, i32, %union.tree_node*, i8 }
  %struct.c_declspecs = type { %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, i32, i32, i8, i32, i16, i8 }
  
  @flag_isoc99 = internal unnamed_addr global i1 false, align 4
  @pedantic = internal global i32 0, align 4
  
  ; Function Attrs: nounwind
  define fastcc %union.tree_node* @grokdeclarator(%struct.c_declarator* noundef readonly %declarator, %struct.c_declspecs* nocapture noundef %declspecs) unnamed_addr #0 {
  entry:
    %type = getelementptr inbounds %struct.c_declspecs, %struct.c_declspecs* %declspecs, i32 0, i32 0
    %thread_p = getelementptr inbounds %struct.c_declspecs, %struct.c_declspecs* %declspecs, i32 0, i32 8
    %p0 = bitcast %struct.c_declarator* %declarator to i64*
    %t0 = load i64, i64* %p0, align 8
    %cmp00 = icmp eq i64 %t0, 0
    br i1 %cmp00, label %if.end10, label %cleanup
  
  if.end10:                                         ; preds = %entry
    %t1 = load %union.tree_node*, %union.tree_node** %type, align 4
    %t2 = getelementptr %union.tree_node, %union.tree_node* %t1, i32 0, i32 0, i32 0, i32 0, i32 0
    %bf.load1 = load i64, i64* %t2, align 8
    %bf.lshr.mask5.i = and i64 %bf.load1, -281474976710656
    %cmp10 = icmp eq i64 %bf.lshr.mask5.i, 4222124650659840
    %extract.t814 = trunc i64 %bf.load1 to i8
    %extract.t817 = trunc i64 %bf.load1 to i32
    %extract819 = lshr i64 %bf.load1, 43
    %extract.t820 = trunc i64 %extract819 to i32
    %extract823 = lshr i64 %bf.load1, 44
    %extract.t824 = trunc i64 %extract823 to i32
    br i1 %cmp10, label %if.then20, label %if.else20
  
  if.then20:                                        ; preds = %if.end10
    %type1.i33 = getelementptr inbounds %union.tree_node, %union.tree_node* %t1, i32 0, i32 0, i32 0, i32 2
    %t3 = load %union.tree_node*, %union.tree_node** %type1.i33, align 4
    %t4 = getelementptr %union.tree_node, %union.tree_node* %t3, i32 0, i32 0, i32 0, i32 0, i32 0
    %bf.load2 = load i64, i64* %t4, align 8
    %extract.t = trunc i64 %bf.load2 to i8
    %extract.t816 = trunc i64 %bf.load2 to i32
    %extract = lshr i64 %bf.load2, 43
    %extract.t818 = trunc i64 %extract to i32
    %extract821 = lshr i64 %bf.load2, 44
    %extract.t822 = trunc i64 %extract821 to i32
    br label %if.else20
  
  if.else20:                                        ; preds = %if.then20, %if.end10
    %bf.load.off0 = phi i8 [ %extract.t, %if.then20 ], [ %extract.t814, %if.end10 ]
    %bf.load.off0815 = phi i32 [ %extract.t816, %if.then20 ], [ %extract.t817, %if.end10 ]
    %bf.load.off43 = phi i32 [ %extract.t818, %if.then20 ], [ %extract.t820, %if.end10 ]
    %bf.load.off44 = phi i32 [ %extract.t822, %if.then20 ], [ %extract.t824, %if.end10 ]
    %type.addr.0.lcssa.i = phi %union.tree_node* [ %t3, %if.then20 ], [ %t1, %if.end10 ]
    %p5 = getelementptr inbounds %union.tree_node, %union.tree_node* %type.addr.0.lcssa.i, i32 0, i32 0, i32 1, i32 0
    %p9 = getelementptr inbounds %struct.c_declspecs, %struct.c_declspecs* %declspecs, i32 0, i32 9
    %bf.load154 = load i16, i16* %thread_p, align 4
    %bf.lshr155 = lshr i16 %bf.load154, 7
    %bf.clear156 = and i16 %bf.lshr155, 1
    %bf.cast157 = zext i16 %bf.clear156 to i32
    %bf.cast162 = and i32 %bf.load.off43, 1
    %add = add nuw nsw i32 %bf.cast162, %bf.cast157
    %bf.load168 = load i32, i32* %p5, align 4
    %bf.lshr169 = lshr i32 %bf.load168, 18
    %t6 = insertelement <2 x i16> poison, i16 %bf.load154, i64 0
    %t7 = shufflevector <2 x i16> %t6, <2 x i16> poison, <2 x i32> zeroinitializer
    %t8 = lshr <2 x i16> %t7, <i16 5, i16 6>
    %t9 = and <2 x i16> %t8, <i16 1, i16 1>
    %t10 = zext <2 x i16> %t9 to <2 x i32>
    %t11 = insertelement <2 x i32> poison, i32 %bf.lshr169, i64 0
    %t12 = insertelement <2 x i32> %t11, i32 %bf.load.off44, i64 1
    %t13 = and <2 x i32> %t12, <i32 1, i32 1>
    %t14 = add nuw nsw <2 x i32> %t13, %t10
    %t15 = load i8, i8* %p9, align 2
    %conv188 = zext i8 %t15 to i32
    %cmp20 = icmp eq i8 %t15, 0
    %conv192 = and i32 %bf.load.off0815, 255
    %cond196 = select i1 %cmp20, i32 %bf.load.off0815, i32 %conv188
    %t16 = load i32, i32* @pedantic, align 4
    %cmp30 = icmp eq i32 %t16, 0
    %.b28 = load i1, i1* @flag_isoc99, align 4
    %t17 = insertelement <2 x i1> poison, i1 %cmp20, i64 0
    %t18 = insertelement <2 x i1> %t17, i1 %cmp30, i64 1
    %t19 = zext <2 x i1> %t18 to <2 x i64>
    %or.cond1969 = select i1 %cmp30, i1 true, i1 %.b28
    br i1 %or.cond1969, label %cleanup, label %if.else30
  
  if.else30:                                        ; preds = %if.else20
    %cmp40 = icmp ugt i32 %add, 1
    br i1 %cmp40, label %if.then40, label %if.end40
  
  if.then40:                                        ; preds = %if.else30
    br label %if.end40
  
  if.end40:                                         ; preds = %if.then40, %if.else30
    %t20 = extractelement <2 x i32> %t14, i64 0
    %cmp50 = icmp ugt i32 %t20, 1
    br i1 %cmp50, label %if.then50, label %if.end50
  
  if.then50:                                        ; preds = %if.end40
    br label %if.end50
  
  if.end50:                                         ; preds = %if.then50, %if.end40
    br label %cleanup
  
  cleanup:                                          ; preds = %if.end50, %if.else20, %entry
    ret %union.tree_node* null
  }
  
  attributes #0 = { nounwind "approx-func-fp-math"="true" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr10" "target-features"="+altivec,+bpermd,+crbits,+crypto,+direct-move,+extdiv,+isa-v206-instructions,+isa-v207-instructions,+isa-v30-instructions,+isa-v31-instructions,+mma,+paired-vector-memops,+pcrelative-memops,+power10-vector,+power8-vector,+power9-vector,+prefix-instrs,+vsx,-htm,-privileged,-quadword-atomics,-rop-protect,-spe" }

Here is the dumping with the latest SLPVectorizer.cpp (up to June 16).  To reproduce,

  opt  -slp-vectorizer gd.ll

opt: llvm/main/llvm-project/llvm/lib/IR/Instructions.cpp:2012: llvm::ShuffleVectorInst::ShuffleVectorInst(llvm::Value *, llvm::Value *, ArrayRef<int>, const llvm::Twine &, llvm::Instruction *): Assertion `isValidOperands(V1, V2, Mask) && "Invalid shuffle vector instruction operands!"' failed.
PLEASE submit a bug report to xl_beta at ca.ibm.com and include the crash backtrace.
Stack dump:
0.	Program arguments: llvm/main/build/bin/opt -slp-vectorizer gd.ll
 #0 0x0000000012ea16d4 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (llvm/main/build/bin/opt+0x12ea16d4)
 #1 0x0000000012ea1af4 PrintStackTraceSignalHandler(void*) Signals.cpp:0:0
 #2 0x0000000012e9e818 llvm::sys::RunSignalHandlers() (llvm/main/build/bin/opt+0x12e9e818)
 #3 0x0000000012ea1dbc SignalHandler(int) Signals.cpp:0:0
 #4 0x00007d17768b04c8 (linux-vdso64.so.1+0x4c8)
 #5 0x00007d1776130468 __libc_signal_restore_set /build/glibc-tRXAGY/glibc-2.31/signal/../sysdeps/unix/sysv/linux/internal-signals.h:86:3
 #6 0x00007d1776130468 raise /build/glibc-tRXAGY/glibc-2.31/signal/../sysdeps/unix/sysv/linux/raise.c:48:3
 #7 0x00007d1776107cd0 abort /build/glibc-tRXAGY/glibc-2.31/stdlib/abort.c:79:7
 #8 0x00007d177611f5dc __assert_fail_base /build/glibc-tRXAGY/glibc-2.31/assert/assert.c:92:3
 #9 0x00007d177611f680 __assert_fail /build/glibc-tRXAGY/glibc-2.31/assert/assert.c:101:3
#10 0x00000000124870cc llvm::ShuffleVectorInst::ShuffleVectorInst(llvm::Value*, llvm::Value*, llvm::ArrayRef<int>, llvm::Twine const&, llvm::Instruction*) (llvm/main/build/bin/opt+0x124870cc)
#11 0x000000001064b62c llvm::IRBuilderBase::CreateShuffleVector(llvm::Value*, llvm::Value*, llvm::ArrayRef<int>, llvm::Twine const&) (llvm/main/build/bin/opt+0x1064b62c)
#12 0x000000001318a698 llvm::slpvectorizer::BoUpSLP::vectorizeTree(llvm::MapVector<llvm::Value*, llvm::SmallVector<llvm::Instruction*, 2u>, llvm::DenseMap<llvm::Value*, unsigned int, llvm::DenseMapInfo<llvm::Value*, void>, llvm::detail::DenseMapPair<llvm::Value*, unsigned int>>, std::vector<std::pair<llvm::Value*, llvm::SmallVector<llvm::Instruction*, 2u>>, std::allocator<std::pair<llvm::Value*, llvm::SmallVector<llvm::Instruction*, 2u>>>>>&)::$_69::operator()(llvm::Value*, llvm::Value*, llvm::ArrayRef<int>) const SLPVectorizer.cpp:0:0
#13 0x000000001314098c llvm::slpvectorizer::BoUpSLP::vectorizeTree(llvm::MapVector<llvm::Value*, llvm::SmallVector<llvm::Instruction*, 2u>, llvm::DenseMap<llvm::Value*, unsigned int, llvm::DenseMapInfo<llvm::Value*, void>, llvm::detail::DenseMapPair<llvm::Value*, unsigned int>>, std::vector<std::pair<llvm::Value*, llvm::SmallVector<llvm::Instruction*, 2u>>, std::allocator<std::pair<llvm::Value*, llvm::SmallVector<llvm::Instruction*, 2u>>>>>&) (llvm/main/build/bin/opt+0x1314098c)
#14 0x0000000013150de0 llvm::SLPVectorizerPass::tryToVectorizeList(llvm::ArrayRef<llvm::Value*>, llvm::slpvectorizer::BoUpSLP&, bool) (llvm/main/build/bin/opt+0x13150de0)
......

Can you please take a look? Thanks!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107966/new/

https://reviews.llvm.org/D107966



More information about the llvm-commits mailing list