[llvm] [SLP]Initial support for (masked)loads + compress and (masked)interleaved (PR #132099)
antoine moynault via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 6 23:20:15 PDT 2025
antmox wrote:
Hi. Probably useless now but here it is:
```
! { dg-do compile }
! { dg-options "-O3 -ffast-math -fdump-tree-reassoc1 --param max-completely-peeled-insns=200" }
subroutine anisonl(w,vo,anisox,s,ii1,jj1,weight)
integer ii1,jj1,i1,iii1,j1,jjj1,k1,l1,m1,n1
real*8 w(3,3),vo(3,3),anisox(3,3,3,3),s(60,60),weight
!
! This routine replaces the following lines in e_c3d.f for
! an anisotropic material
!
do i1=1,3
iii1=ii1+i1-1
do j1=1,3
jjj1=jj1+j1-1
do k1=1,3
do l1=1,3
s(iii1,jjj1)=s(iii1,jjj1)
& +anisox(i1,k1,j1,l1)*w(k1,l1)*weight
do m1=1,3
s(iii1,jjj1)=s(iii1,jjj1)
& +anisox(i1,k1,m1,l1)*w(k1,l1)
& *vo(j1,m1)*weight
& +anisox(m1,k1,j1,l1)*w(k1,l1)
& *vo(i1,m1)*weight
do n1=1,3
s(iii1,jjj1)=s(iii1,jjj1)
& +anisox(m1,k1,n1,l1)
& *w(k1,l1)*vo(i1,m1)*vo(j1,n1)*weight
enddo
enddo
enddo
enddo
enddo
enddo
return
end
! There should be 22 multiplications left after un-distributing
! weigth, w(k1,l1), vo(i1,m1) and vo(j1,m1) on the innermost two
! unrolled loops.
! { dg-final { scan-tree-dump-times "\[0-9\] \\\* " 22 "reassoc1" } }
```
`stage2.install/bin/flang -fc1 -triple aarch64-unknown-linux-gnu -emit-obj -mrelocation-model pic -pic-level 2 -pic-is-pie -ffast-math -target-cpu neoverse-512tvb -target-feature +outline-atomics -target-feature +v8.4a -target-feature +aes -target-feature +bf16 -target-feature +ccdp -target-feature +ccidx -target-feature +ccpp -target-feature +complxnum -target-feature +crc -target-feature +dotprod -target-feature +fp-armv8 -target-feature +fp16fml -target-feature +fullfp16 -target-feature +i8mm -target-feature +jsconv -target-feature +lse -target-feature +neon -target-feature +pauth -target-feature +perfmon -target-feature +rand -target-feature +ras -target-feature +rcpc -target-feature +rdm -target-feature +sha2 -target-feature +sha3 -target-feature +sm4 -target-feature +spe -target-feature +ssbs -target-feature +sve -mvscale-max=2 -mvscale-min=2 -vectorize-loops -vectorize-slp -fversion-loops-for-stride -mframe-pointer=non-leaf -mllvm -treat-scalable-fixed-error-as-warning=false -O3 -o reassoc_4.o -x f95-cpp-input reassoc_4.f
`
https://github.com/llvm/llvm-project/pull/132099
More information about the llvm-commits
mailing list