[llvm] [CostModel][X86] Fix fpext conversion cost for 16 elements (PR #76278)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 22 21:34:22 PST 2023
HaohaiWen wrote:
```
$cat test.ll
define <8 x double> @foo8(<8 x float> %in) {
%1 = fpext <8 x float> %in to <8 x double>
ret <8 x double> %1
}
define <16 x double> @foo16(<16 x float> %in) {
%1 = fpext <16 x float> %in to <16 x double>
ret <16 x double> %1
}
```
$llc test.ll -mtriple=x86_64-unknown-unknown -mattr=avx512f -o -
```
.text
.file "test.ll"
.globl foo8 # -- Begin function foo8
.p2align 4, 0x90
.type foo8, at function
foo8: # @foo8
.cfi_startproc
# %bb.0:
vcvtps2pd %ymm0, %zmm0
retq
.Lfunc_end0:
.size foo8, .Lfunc_end0-foo8
.cfi_endproc
# -- End function
.globl foo16 # -- Begin function foo16
.p2align 4, 0x90
.type foo16, at function
foo16: # @foo16
.cfi_startproc
# %bb.0:
vcvtps2pd %ymm0, %zmm2
vextractf64x4 $1, %zmm0, %ymm0
vcvtps2pd %ymm0, %zmm1
vmovaps %zmm2, %zmm0
retq
.Lfunc_end1:
.size foo16, .Lfunc_end1-foo16
.cfi_endproc
# -- End function
.section ".note.GNU-stack","", at progbits
```
uiCA measured its TP is 5 for SKX. https://bit.ly/3rGcUKF
https://github.com/llvm/llvm-project/pull/76278
More information about the llvm-commits
mailing list