[llvm] [SLP]Initial support for non-power-of-2 (but still whole register) number of elements in operands. (PR #106449)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 3 01:35:33 PDT 2024
lukel97 wrote:
I'm hitting a separate assertion failure when building SPEC CPU 2017 for rva22u64_v at O3:
```
Assertion failed: (VectorizeNonPowerOf2 && "non-power-of-2 number of loads only " "supported with VectorizeNonPowerOf2"), function canVectorizeLoads, file SLPVectorizer.cpp, line 4771.
```
The reduced case below crashes with `./build.release/bin/opt --passes=slp-vectorizer -disable-output reduced.ll`
```llvm
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-linux-gnu"
define void @_ZNK6dealii24TensorProductPolynomialsILi3EE17compute_grad_gradEjRKNS_5PointILi3EEE(ptr %agg.result) #0 personality ptr null {
entry:
%0 = load double, ptr null, align 8
%mul.1 = fmul double %0, 0.000000e+00
%arrayidx.i39.1 = getelementptr i8, ptr %agg.result, i64 8
%add.ptr.i41.1.1 = getelementptr i8, ptr null, i64 8
%1 = load double, ptr %add.ptr.i41.1.1, align 8
%mul.1.1 = fmul double %1, 0.000000e+00
%mul.2.1 = fmul double 0.000000e+00, %mul.1.1
store double %mul.2.1, ptr %arrayidx.i39.1, align 8
%arrayidx.i39.2 = getelementptr i8, ptr %agg.result, i64 16
%mul.1.2 = fmul double %0, 0.000000e+00
%mul.2.2 = fmul double 0.000000e+00, %mul.1.2
store double %mul.2.2, ptr %arrayidx.i39.2, align 8
%arrayidx.i37.1 = getelementptr i8, ptr %agg.result, i64 24
store double %mul.2.1, ptr %arrayidx.i37.1, align 8
%arrayidx.i39.1.1 = getelementptr i8, ptr %agg.result, i64 32
%add.ptr.i41.1.1.1 = getelementptr i8, ptr null, i64 16
%2 = load double, ptr %add.ptr.i41.1.1.1, align 8
%mul.1.1.1 = fmul double %2, 1.000000e+00
%mul.2.1.1 = fmul double 0.000000e+00, %mul.1.1.1
store double %mul.2.1.1, ptr %arrayidx.i39.1.1, align 8
%arrayidx.i39.2.1 = getelementptr i8, ptr %agg.result, i64 40
%mul.1.2.1 = fmul double %1, 0.000000e+00
%mul.2.2.1 = fmul double 0.000000e+00, %mul.1.2.1
store double %mul.2.2.1, ptr %arrayidx.i39.2.1, align 8
%arrayidx.i37.2 = getelementptr i8, ptr %agg.result, i64 48
store double %mul.2.2, ptr %arrayidx.i37.2, align 8
%arrayidx.i39.1.2 = getelementptr i8, ptr %agg.result, i64 56
store double %mul.2.2.1, ptr %arrayidx.i39.1.2, align 8
%arrayidx.i39.2.2 = getelementptr i8, ptr %agg.result, i64 64
%mul.1.2.2 = fmul double 1.000000e+00, 0.000000e+00
%mul.2.2.2 = fmul double 0.000000e+00, %mul.1.2.2
store double %mul.2.2.2, ptr %arrayidx.i39.2.2, align 8
ret void
}
; uselistorder directives
uselistorder ptr null, { 1, 2, 3, 4, 5, 0 }
attributes #0 = { "target-features"="+64bit,+a,+c,+d,+f,+m,+relax,+v,+za64rs,+zba,+zbb,+zbs,+zfhmin,+zic64b,+zicbom,+zicbop,+zicboz,+ziccamoa,+ziccif,+zicclsm,+ziccrse,+zicntr,+zicsr,+zihintpause,+zihpm,+zkt,+zmmul,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-b,-e,-experimental-smctr,-experimental-smmpm,-experimental-smnpm,-experimental-ssctr,-experimental-ssnpm,-experimental-sspm,-experimental-supm,-experimental-zacas,-experimental-zalasr,-experimental-zicfilp,-experimental-zicfiss,-experimental-zvbc32e,-experimental-zvkgs,-h,-shcounterenw,-shgatpa,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smcdeleg,-smcsrind,-smepmp,-smstateen,-ssaia,-ssccfg,-ssccptr,-sscofpmf,-sscounterenw,-sscsrind,-ssqosid,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xsfcease,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecdiscarddlone,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-xwchc,-za128rs,-zaamo,-zabha,-zalrsc,-zama16b,-zawrs,-zbc,-zbkb,-zbkc,-zbkx,-zca,-zcb,-zcd,-zce,-zcf,-zcmop,-zcmp,-zcmt,-zdinx,-zfa,-zfbfmin,-zfh,-zfinx,-zhinx,-zhinxmin,-zicond,-zifencei,-zihintntl,-zimop,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-ztso,-zvbb,-zvbc,-zvfbfmin,-zvfbfwma,-zvfh,-zvfhmin,-zvkb,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvkt,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" }
```
https://github.com/llvm/llvm-project/pull/106449
More information about the llvm-commits
mailing list