[llvm] Reland "[LoopVectorizer] Add support for partial reductions" (PR #120721)

Zequan Wu via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 26 16:27:07 PST 2024


ZequanWu wrote:

We found a opt crash caused by this commit (https://g-issues.chromium.org/issues/386257100):

reduced.ll:
```
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "arm64-apple-macosx11.0.0"

define void @_ZN6tflite12tensor_utils49PortableSparseMatrixBatchVectorMultiplyAccumulateEPKaPKhiiS2_PKfiPfS6_(ptr %matrix, i32 %conv) #0 {
entry:
  br label %for.cond6

for.cond6:                                        ; preds = %for.cond.cleanup14, %entry
  %dotprod.0 = phi i32 [ 0, %entry ], [ %dotprod.1, %for.cond.cleanup14 ]
  %row_ptr.0 = phi ptr [ %matrix, %entry ], [ %row_ptr.1, %for.cond.cleanup14 ]
  %i.0 = phi i32 [ 0, %entry ], [ %inc22, %for.cond.cleanup14 ]
  %cmp7 = icmp slt i32 %i.0, %conv
  br i1 %cmp7, label %for.cond12, label %for.cond.cleanup8

for.cond.cleanup8:                                ; preds = %for.cond6
  %conv27 = sitofp i32 %dotprod.0 to float
  store float %conv27, ptr %matrix, align 4
  ret void

for.cond12:                                       ; preds = %for.body15, %for.cond6
  %dotprod.1 = phi i32 [ %add, %for.body15 ], [ %dotprod.0, %for.cond6 ]
  %row_ptr.1 = phi ptr [ %incdec.ptr16, %for.body15 ], [ %row_ptr.0, %for.cond6 ]
  %vector_block_ptr.0 = phi ptr [ %incdec.ptr18, %for.body15 ], [ null, %for.cond6 ]
  %c.0 = phi i32 [ %inc, %for.body15 ], [ 0, %for.cond6 ]
  %cmp13 = icmp slt i32 %c.0, 16
  br i1 %cmp13, label %for.body15, label %for.cond.cleanup14

for.cond.cleanup14:                               ; preds = %for.cond12
  %inc22 = add i32 %i.0, 1
  br label %for.cond6

for.body15:                                       ; preds = %for.cond12
  %incdec.ptr16 = getelementptr i8, ptr %row_ptr.1, i64 1
  %0 = load i8, ptr %row_ptr.1, align 1
  %conv17 = sext i8 %0 to i32
  %incdec.ptr18 = getelementptr i8, ptr %vector_block_ptr.0, i64 1
  %1 = load i8, ptr %vector_block_ptr.0, align 1
  %conv19 = sext i8 %1 to i32
  %mul20 = mul i32 %conv17, %conv19
  %add = add i32 %dotprod.1, %mul20
  %inc = add i32 %c.0, 1
  br label %for.cond12
}

attributes #0 = { "target-cpu"="apple-m1" }
```
Run `opt -O2 reduced.ll` to repro the crash:
```
opt: /usr/local/google/home/zequanwu/work/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h:2450: llvm::VPPartialReductionRecipe::VPPartialReductionRecipe(unsigned int, VPValue *, VPValue *, Instruction *): Assertion `isa<VPReductionPHIRecipe>(getOperand(1)->getDefiningRecipe()) && "Unexpected operand order for partial reduction recipe"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.      Program arguments: opt -O2 reduced.ll
1.      Running pass "function<eager-inv>(float2int,lower-constant-intrinsics,loop(loop-rotate<header-duplication;no-prepare-for-lto>,loop-deletion),loop-distribute,inject-tli-mappings,loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,infer-alignment,loop-load-elim,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,slp-vectorizer,vector-combine,instcombine<max-iterations=1;no-verify-fixpoint>,loop-unroll<O2>,transform-warning,sroa<preserve-cfg>,infer-alignment,instcombine<max-iterations=1;no-verify-fixpoint>,loop-mssa(licm<allowspeculation>),alignment-from-assumptions,loop-sink,instsimplify,div-rem-pairs,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>)" on module "reduced.ll"
2.      Running pass "loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>" on function "_ZN6tflite12tensor_utils49PortableSparseMatrixBatchVectorMultiplyAccumulateEPKaPKhiiS2_PKfiPfS6_"
 #0 0x000055b364a1ed18 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/usr/local/google/home/zequanwu/work/llvm-project/out/cmake/bin/opt+0x44f8d18)
 #1 0x000055b364a1c7ce llvm::sys::RunSignalHandlers() (/usr/local/google/home/zequanwu/work/llvm-project/out/cmake/bin/opt+0x44f67ce)
 #2 0x000055b364a1f538 SignalHandler(int) Signals.cpp:0:0
 #3 0x00007f113ae56590 (/lib/x86_64-linux-gnu/libc.so.6+0x3f590)
 #4 0x00007f113aea53ac __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
 #5 0x00007f113ae564f2 raise ./signal/../sysdeps/posix/raise.c:27:6
 #6 0x00007f113ae3f4ed abort ./stdlib/abort.c:81:7
 #7 0x00007f113ae3f415 _nl_load_domain ./intl/loadmsgcat.c:1177:9
 #8 0x00007f113ae4f012 (/lib/x86_64-linux-gnu/libc.so.6+0x38012)
 #9 0x000055b365f5b5ad (/usr/local/google/home/zequanwu/work/llvm-project/out/cmake/bin/opt+0x5a355ad)
#10 0x000055b365f5b348 llvm::VPRecipeBuilder::tryToCreatePartialReduction(llvm::Instruction*, llvm::ArrayRef<llvm::VPValue*>) (/usr/local/google/home/zequanwu/work/llvm-project/out/cmake/bin/opt+0x5a35348)
#11 0x000055b365f5aa0f llvm::VPRecipeBuilder::tryToCreateWidenRecipe(llvm::Instruction*, llvm::ArrayRef<llvm::VPValue*>, llvm::VFRange&, llvm::VPBasicBlock*) (/usr/local/google/home/zequanwu/work/llvm-project/out/cmake/bin/opt+0x5a34a0f)
#12 0x000055b365f5c774 llvm::LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(llvm::VFRange&) (/usr/local/google/home/zequanwu/work/llvm-project/out/cmake/bin/opt+0x5a36774)
#13 0x000055b365f4ec91 llvm::LoopVectorizationPlanner::buildVPlansWithVPRecipes(llvm::ElementCount, llvm::ElementCount) (/usr/local/google/home/zequanwu/work/llvm-project/out/cmake/bin/opt+0x5a28c91)
#14 0x000055b365f4e752 llvm::LoopVectorizationPlanner::plan(llvm::ElementCount, unsigned int) (/usr/local/google/home/zequanwu/work/llvm-project/out/cmake/bin/opt+0x5a28752)
#15 0x000055b365f65368 llvm::LoopVectorizePass::processLoop(llvm::Loop*) (/usr/local/google/home/zequanwu/work/llvm-project/out/cmake/bin/opt+0x5a3f368)
#16 0x000055b365f6b81b llvm::LoopVectorizePass::runImpl(llvm::Function&) (/usr/local/google/home/zequanwu/work/llvm-project/out/cmake/bin/opt+0x5a4581b)
#17 0x000055b365f6c085 llvm::LoopVectorizePass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/usr/local/google/home/zequanwu/work/llvm-project/out/cmake/bin/opt+0x5a46085)
#18 0x000055b365e1908d llvm::detail::PassModel<llvm::Function, llvm::LoopVectorizePass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) PassBuilderPipelines.cpp:0:0
#19 0x000055b364c2faaa llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/usr/local/google/home/zequanwu/work/llvm-project/out/cmake/bin/opt+0x4709aaa)
#20 0x000055b365e1138d llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) PassBuilderPipelines.cpp:0:0
#21 0x000055b364c34397 llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/usr/local/google/home/zequanwu/work/llvm-project/out/cmake/bin/opt+0x470e397)
#22 0x000055b365e0961d llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) PassBuilderPipelines.cpp:0:0
#23 0x000055b364c2e81a llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/usr/local/google/home/zequanwu/work/llvm-project/out/cmake/bin/opt+0x470881a)
#24 0x000055b365da8fa4 llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool) (/usr/local/google/home/zequanwu/work/llvm-project/out/cmake/bin/opt+0x5882fa4)
#25 0x000055b3649e66bf optMain (/usr/local/google/home/zequanwu/work/llvm-project/out/cmake/bin/opt+0x44c06bf)
#26 0x00007f113ae40c8a __libc_start_call_main ./csu/../sysdeps/nptl/libc_start_call_main.h:74:3
#27 0x00007f113ae40d45 call_init ./csu/../csu/libc-start.c:128:20
#28 0x00007f113ae40d45 __libc_start_main ./csu/../csu/libc-start.c:347:5
#29 0x000055b3649dfee1 _start (/usr/local/google/home/zequanwu/work/llvm-project/out/cmake/bin/opt+0x44b9ee1)
[1]    2715016 IOT instruction  opt -O2 reduced.ll
```

Can you take a look or revert if it takes a while to fix it? 

https://github.com/llvm/llvm-project/pull/120721


More information about the llvm-commits mailing list