[llvm] [CostModel][X86] Fix fpext conversion cost for 16 elements (PR #76278)

via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 22 21:34:22 PST 2023


HaohaiWen wrote:

```
 $cat test.ll
define <8 x double> @foo8(<8 x float> %in) {
  %1 = fpext <8 x float> %in to <8 x double>
  ret <8 x double> %1
}

define <16 x double> @foo16(<16 x float> %in) {
  %1 = fpext <16 x float> %in to <16 x double>
  ret <16 x double> %1
}

```
 $llc test.ll -mtriple=x86_64-unknown-unknown -mattr=avx512f -o -
```
        .text
        .file   "test.ll"
        .globl  foo8                            # -- Begin function foo8
        .p2align        4, 0x90
        .type   foo8, at function
foo8:                                   # @foo8
        .cfi_startproc
# %bb.0:
        vcvtps2pd       %ymm0, %zmm0
        retq
.Lfunc_end0:
        .size   foo8, .Lfunc_end0-foo8
        .cfi_endproc
                                        # -- End function
        .globl  foo16                           # -- Begin function foo16
        .p2align        4, 0x90
        .type   foo16, at function
foo16:                                  # @foo16
        .cfi_startproc
# %bb.0:
        vcvtps2pd       %ymm0, %zmm2
        vextractf64x4   $1, %zmm0, %ymm0
        vcvtps2pd       %ymm0, %zmm1
        vmovaps %zmm2, %zmm0
        retq
.Lfunc_end1:
        .size   foo16, .Lfunc_end1-foo16
        .cfi_endproc
                                        # -- End function
        .section        ".note.GNU-stack","", at progbits

```
uiCA measured its TP is 5 for SKX. https://bit.ly/3rGcUKF

https://github.com/llvm/llvm-project/pull/76278


More information about the llvm-commits mailing list