[llvm] dfc03c4 - [X86] vector-half-conversions.ll - regenerate with AVX512 slow/fast lane shuffles
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 20 04:09:26 PST 2023
Author: Simon Pilgrim
Date: 2023-11-20T12:09:16Z
New Revision: dfc03c45c1e1d80482963ceb5dd3eb48079dd8c0
URL: https://github.com/llvm/llvm-project/commit/dfc03c45c1e1d80482963ceb5dd3eb48079dd8c0
DIFF: https://github.com/llvm/llvm-project/commit/dfc03c45c1e1d80482963ceb5dd3eb48079dd8c0.diff
LOG: [X86] vector-half-conversions.ll - regenerate with AVX512 slow/fast lane shuffles
Adds missing check prefixes
Added:
Modified:
llvm/test/CodeGen/X86/vector-half-conversions.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/vector-half-conversions.ll b/llvm/test/CodeGen/X86/vector-half-conversions.ll
index ed6f5007b77e328..21533818bac11a9 100644
--- a/llvm/test/CodeGen/X86/vector-half-conversions.ll
+++ b/llvm/test/CodeGen/X86/vector-half-conversions.ll
@@ -5,9 +5,9 @@
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+f16c -verify-machineinstrs | FileCheck %s --check-prefixes=F16C
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+f16c,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=F16C
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+f16c,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=F16C
-; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512
-; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512
-; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512,AVX512F
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512,AVX512-FASTLANE
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512,AVX512-FASTLANE
;
; Half to Float
@@ -3156,6 +3156,49 @@ define <2 x i16> @cvt_2f64_to_2i16(<2 x double> %a0) nounwind {
; F16C-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; F16C-NEXT: addq $40, %rsp
; F16C-NEXT: retq
+;
+; AVX512F-LABEL: cvt_2f64_to_2i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: subq $104, %rsp
+; AVX512F-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512F-NEXT: callq __truncdfhf2 at PLT
+; AVX512F-NEXT: vpbroadcastw %xmm0, %xmm0
+; AVX512F-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
+; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: callq __truncdfhf2 at PLT
+; AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512F-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
+; AVX512F-NEXT: # xmm0 = mem[1,0]
+; AVX512F-NEXT: callq __truncdfhf2 at PLT
+; AVX512F-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; AVX512F-NEXT: vmovaps {{.*#+}} xmm1 = [16,0,0,0]
+; AVX512F-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
+; AVX512F-NEXT: vpermt2ps %zmm2, %zmm1, %zmm0
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512F-NEXT: addq $104, %rsp
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512-FASTLANE-LABEL: cvt_2f64_to_2i16:
+; AVX512-FASTLANE: # %bb.0:
+; AVX512-FASTLANE-NEXT: subq $40, %rsp
+; AVX512-FASTLANE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-FASTLANE-NEXT: callq __truncdfhf2 at PLT
+; AVX512-FASTLANE-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX512-FASTLANE-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; AVX512-FASTLANE-NEXT: # xmm0 = mem[1,0]
+; AVX512-FASTLANE-NEXT: callq __truncdfhf2 at PLT
+; AVX512-FASTLANE-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; AVX512-FASTLANE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; AVX512-FASTLANE-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512-FASTLANE-NEXT: callq __truncdfhf2 at PLT
+; AVX512-FASTLANE-NEXT: vpbroadcastw %xmm0, %xmm1
+; AVX512-FASTLANE-NEXT: vmovaps {{.*#+}} xmm0 = [4,0,0,0]
+; AVX512-FASTLANE-NEXT: vpermi2ps (%rsp), %xmm1, %xmm0 # 16-byte Folded Reload
+; AVX512-FASTLANE-NEXT: addq $40, %rsp
+; AVX512-FASTLANE-NEXT: retq
%1 = fptrunc <2 x double> %a0 to <2 x half>
%2 = bitcast <2 x half> %1 to <2 x i16>
ret <2 x i16> %2
More information about the llvm-commits
mailing list