[llvm] X86: Support FCANONICALIZE on f64/f80 for i686 with SSE2 or AVX (PR #123917)
YunQiang Su via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 23 22:11:20 PST 2025
https://github.com/wzssyqa updated https://github.com/llvm/llvm-project/pull/123917
>From 358924ceaaa1d4f8bbf387e528686a8b7c7fdf24 Mon Sep 17 00:00:00 2001
From: YunQiang Su <yunqiang at isrc.iscas.ac.cn>
Date: Fri, 24 Jan 2025 13:05:28 +0800
Subject: [PATCH] X86: Support FCANONICALIZE on f64/f80 for i686 with SSE2 or
AVX
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 4 +-
llvm/test/CodeGen/X86/canonicalize-vars.ll | 336 ++++++++++++++++++++-
2 files changed, 335 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a956074e50d86f..0dd086db93ff9f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -334,10 +334,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
}
setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
}
}
if (Subtarget.hasAVX10_2()) {
@@ -367,7 +367,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::f80, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
@@ -890,6 +889,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
+ setOperationAction(ISD::FCANONICALIZE , MVT::f80, Custom);
if (isTypeLegal(MVT::f16)) {
setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
diff --git a/llvm/test/CodeGen/X86/canonicalize-vars.ll b/llvm/test/CodeGen/X86/canonicalize-vars.ll
index 951ea1b72f4390..387d2d8b6ddaf0 100644
--- a/llvm/test/CodeGen/X86/canonicalize-vars.ll
+++ b/llvm/test/CodeGen/X86/canonicalize-vars.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5
; RUN: llc -mtriple=i686-- < %s | FileCheck %s -check-prefixes=X87
-; RUN: llc -mattr=+sse2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=SSE,SSE2
+; RUN: llc -mattr=+sse2 -mtriple=i686-- < %s | FileCheck %s -check-prefixes=SSE686
+; RUN: llc -mattr=+avx -mtriple=i686-- < %s | FileCheck %s -check-prefixes=AVX686
+; RUN: llc -mattr=+sse2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=SSE
; RUN: llc -mattr=+avx -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX1
; RUN: llc -mattr=+avx2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX2
; RUN: llc -mattr=+avx512f -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX512F
@@ -12,6 +14,30 @@ define float @canon_fp32_varargsf32(float %a) {
; X87-NEXT: fmuls {{[0-9]+}}(%esp)
; X87-NEXT: retl
;
+; SSE686-LABEL: canon_fp32_varargsf32:
+; SSE686: # %bb.0:
+; SSE686-NEXT: pushl %eax
+; SSE686-NEXT: .cfi_def_cfa_offset 8
+; SSE686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE686-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; SSE686-NEXT: movss %xmm0, (%esp)
+; SSE686-NEXT: flds (%esp)
+; SSE686-NEXT: popl %eax
+; SSE686-NEXT: .cfi_def_cfa_offset 4
+; SSE686-NEXT: retl
+;
+; AVX686-LABEL: canon_fp32_varargsf32:
+; AVX686: # %bb.0:
+; AVX686-NEXT: pushl %eax
+; AVX686-NEXT: .cfi_def_cfa_offset 8
+; AVX686-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX686-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; AVX686-NEXT: vmovss %xmm0, (%esp)
+; AVX686-NEXT: flds (%esp)
+; AVX686-NEXT: popl %eax
+; AVX686-NEXT: .cfi_def_cfa_offset 4
+; AVX686-NEXT: retl
+;
; SSE-LABEL: canon_fp32_varargsf32:
; SSE: # %bb.0:
; SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -33,6 +59,20 @@ define x86_fp80 @canon_fp32_varargsf80(x86_fp80 %a) {
; X87-NEXT: fmulp %st, %st(1)
; X87-NEXT: retl
;
+; SSE686-LABEL: canon_fp32_varargsf80:
+; SSE686: # %bb.0:
+; SSE686-NEXT: fldt {{[0-9]+}}(%esp)
+; SSE686-NEXT: fld1
+; SSE686-NEXT: fmulp %st, %st(1)
+; SSE686-NEXT: retl
+;
+; AVX686-LABEL: canon_fp32_varargsf80:
+; AVX686: # %bb.0:
+; AVX686-NEXT: fldt {{[0-9]+}}(%esp)
+; AVX686-NEXT: fld1
+; AVX686-NEXT: fmulp %st, %st(1)
+; AVX686-NEXT: retl
+;
; SSE-LABEL: canon_fp32_varargsf80:
; SSE: # %bb.0:
; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
@@ -64,6 +104,32 @@ define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) {
; X87-NEXT: fsubp %st, %st(1)
; X87-NEXT: retl
;
+; SSE686-LABEL: complex_canonicalize_fmul_x86_fp80:
+; SSE686: # %bb.0: # %entry
+; SSE686-NEXT: fldt {{[0-9]+}}(%esp)
+; SSE686-NEXT: fldt {{[0-9]+}}(%esp)
+; SSE686-NEXT: fsub %st(1), %st
+; SSE686-NEXT: fld %st(0)
+; SSE686-NEXT: fadd %st(2), %st
+; SSE686-NEXT: fsubp %st, %st(1)
+; SSE686-NEXT: fld1
+; SSE686-NEXT: fmulp %st, %st(1)
+; SSE686-NEXT: fsubp %st, %st(1)
+; SSE686-NEXT: retl
+;
+; AVX686-LABEL: complex_canonicalize_fmul_x86_fp80:
+; AVX686: # %bb.0: # %entry
+; AVX686-NEXT: fldt {{[0-9]+}}(%esp)
+; AVX686-NEXT: fldt {{[0-9]+}}(%esp)
+; AVX686-NEXT: fsub %st(1), %st
+; AVX686-NEXT: fld %st(0)
+; AVX686-NEXT: fadd %st(2), %st
+; AVX686-NEXT: fsubp %st, %st(1)
+; AVX686-NEXT: fld1
+; AVX686-NEXT: fmulp %st, %st(1)
+; AVX686-NEXT: fsubp %st, %st(1)
+; AVX686-NEXT: retl
+;
; SSE-LABEL: complex_canonicalize_fmul_x86_fp80:
; SSE: # %bb.0: # %entry
; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
@@ -130,6 +196,54 @@ define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 {
; X87-NEXT: fmulp %st, %st(1)
; X87-NEXT: retl
;
+; SSE686-LABEL: canonicalize_fp64:
+; SSE686: # %bb.0: # %start
+; SSE686-NEXT: pushl %ebp
+; SSE686-NEXT: .cfi_def_cfa_offset 8
+; SSE686-NEXT: .cfi_offset %ebp, -8
+; SSE686-NEXT: movl %esp, %ebp
+; SSE686-NEXT: .cfi_def_cfa_register %ebp
+; SSE686-NEXT: andl $-8, %esp
+; SSE686-NEXT: subl $8, %esp
+; SSE686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE686-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; SSE686-NEXT: movapd %xmm0, %xmm2
+; SSE686-NEXT: cmpunordsd %xmm0, %xmm2
+; SSE686-NEXT: movapd %xmm2, %xmm3
+; SSE686-NEXT: andpd %xmm1, %xmm3
+; SSE686-NEXT: maxsd %xmm0, %xmm1
+; SSE686-NEXT: andnpd %xmm1, %xmm2
+; SSE686-NEXT: orpd %xmm3, %xmm2
+; SSE686-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
+; SSE686-NEXT: movsd %xmm2, (%esp)
+; SSE686-NEXT: fldl (%esp)
+; SSE686-NEXT: movl %ebp, %esp
+; SSE686-NEXT: popl %ebp
+; SSE686-NEXT: .cfi_def_cfa %esp, 4
+; SSE686-NEXT: retl
+;
+; AVX686-LABEL: canonicalize_fp64:
+; AVX686: # %bb.0: # %start
+; AVX686-NEXT: pushl %ebp
+; AVX686-NEXT: .cfi_def_cfa_offset 8
+; AVX686-NEXT: .cfi_offset %ebp, -8
+; AVX686-NEXT: movl %esp, %ebp
+; AVX686-NEXT: .cfi_def_cfa_register %ebp
+; AVX686-NEXT: andl $-8, %esp
+; AVX686-NEXT: subl $8, %esp
+; AVX686-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX686-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; AVX686-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
+; AVX686-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
+; AVX686-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX686-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; AVX686-NEXT: vmovsd %xmm0, (%esp)
+; AVX686-NEXT: fldl (%esp)
+; AVX686-NEXT: movl %ebp, %esp
+; AVX686-NEXT: popl %ebp
+; AVX686-NEXT: .cfi_def_cfa %esp, 4
+; AVX686-NEXT: retl
+;
; SSE-LABEL: canonicalize_fp64:
; SSE: # %bb.0: # %start
; SSE-NEXT: movapd %xmm0, %xmm2
@@ -207,6 +321,42 @@ define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 {
; X87-NEXT: fmulp %st, %st(1)
; X87-NEXT: retl
;
+; SSE686-LABEL: canonicalize_fp32:
+; SSE686: # %bb.0: # %start
+; SSE686-NEXT: pushl %eax
+; SSE686-NEXT: .cfi_def_cfa_offset 8
+; SSE686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE686-NEXT: movaps %xmm0, %xmm2
+; SSE686-NEXT: cmpunordss %xmm0, %xmm2
+; SSE686-NEXT: movaps %xmm2, %xmm3
+; SSE686-NEXT: andps %xmm1, %xmm3
+; SSE686-NEXT: maxss %xmm0, %xmm1
+; SSE686-NEXT: andnps %xmm1, %xmm2
+; SSE686-NEXT: orps %xmm3, %xmm2
+; SSE686-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
+; SSE686-NEXT: movss %xmm2, (%esp)
+; SSE686-NEXT: flds (%esp)
+; SSE686-NEXT: popl %eax
+; SSE686-NEXT: .cfi_def_cfa_offset 4
+; SSE686-NEXT: retl
+;
+; AVX686-LABEL: canonicalize_fp32:
+; AVX686: # %bb.0: # %start
+; AVX686-NEXT: pushl %eax
+; AVX686-NEXT: .cfi_def_cfa_offset 8
+; AVX686-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX686-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX686-NEXT: vmaxss %xmm0, %xmm1, %xmm2
+; AVX686-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
+; AVX686-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX686-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; AVX686-NEXT: vmovss %xmm0, (%esp)
+; AVX686-NEXT: flds (%esp)
+; AVX686-NEXT: popl %eax
+; AVX686-NEXT: .cfi_def_cfa_offset 4
+; AVX686-NEXT: retl
+;
; SSE-LABEL: canonicalize_fp32:
; SSE: # %bb.0: # %start
; SSE-NEXT: movaps %xmm0, %xmm2
@@ -261,6 +411,22 @@ define void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 {
; X87-NEXT: fstps (%eax)
; X87-NEXT: retl
;
+; SSE686-LABEL: v_test_canonicalize_var_f32:
+; SSE686: # %bb.0:
+; SSE686-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE686-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; SSE686-NEXT: movss %xmm0, (%eax)
+; SSE686-NEXT: retl
+;
+; AVX686-LABEL: v_test_canonicalize_var_f32:
+; AVX686: # %bb.0:
+; AVX686-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX686-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX686-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; AVX686-NEXT: vmovss %xmm0, (%eax)
+; AVX686-NEXT: retl
+;
; SSE-LABEL: v_test_canonicalize_var_f32:
; SSE: # %bb.0:
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
@@ -290,6 +456,24 @@ define void @v_test_canonicalize_x86_fp80(x86_fp80 addrspace(1)* %out) #1 {
; X87-NEXT: fstpt (%eax)
; X87-NEXT: retl
;
+; SSE686-LABEL: v_test_canonicalize_x86_fp80:
+; SSE686: # %bb.0:
+; SSE686-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE686-NEXT: fldt (%eax)
+; SSE686-NEXT: fld1
+; SSE686-NEXT: fmulp %st, %st(1)
+; SSE686-NEXT: fstpt (%eax)
+; SSE686-NEXT: retl
+;
+; AVX686-LABEL: v_test_canonicalize_x86_fp80:
+; AVX686: # %bb.0:
+; AVX686-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX686-NEXT: fldt (%eax)
+; AVX686-NEXT: fld1
+; AVX686-NEXT: fmulp %st, %st(1)
+; AVX686-NEXT: fstpt (%eax)
+; AVX686-NEXT: retl
+;
; SSE-LABEL: v_test_canonicalize_x86_fp80:
; SSE: # %bb.0:
; SSE-NEXT: fldt (%rdi)
@@ -320,6 +504,22 @@ define void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 {
; X87-NEXT: fstpl (%eax)
; X87-NEXT: retl
;
+; SSE686-LABEL: v_test_canonicalize_var_f64:
+; SSE686: # %bb.0:
+; SSE686-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE686-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; SSE686-NEXT: movsd %xmm0, (%eax)
+; SSE686-NEXT: retl
+;
+; AVX686-LABEL: v_test_canonicalize_var_f64:
+; AVX686: # %bb.0:
+; AVX686-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX686-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX686-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; AVX686-NEXT: vmovsd %xmm0, (%eax)
+; AVX686-NEXT: retl
+;
; SSE-LABEL: v_test_canonicalize_var_f64:
; SSE: # %bb.0:
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
@@ -347,6 +547,20 @@ define void @canonicalize_undef(double addrspace(1)* %out) {
; X87-NEXT: movl $0, (%eax)
; X87-NEXT: retl
;
+; SSE686-LABEL: canonicalize_undef:
+; SSE686: # %bb.0:
+; SSE686-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE686-NEXT: movl $2146959360, 4(%eax) # imm = 0x7FF80000
+; SSE686-NEXT: movl $0, (%eax)
+; SSE686-NEXT: retl
+;
+; AVX686-LABEL: canonicalize_undef:
+; AVX686: # %bb.0:
+; AVX686-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX686-NEXT: movl $2146959360, 4(%eax) # imm = 0x7FF80000
+; AVX686-NEXT: movl $0, (%eax)
+; AVX686-NEXT: retl
+;
; SSE-LABEL: canonicalize_undef:
; SSE: # %bb.0:
; SSE-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
@@ -384,6 +598,16 @@ define <4 x float> @canon_fp32_varargsv4f32(<4 x float> %a) {
; X87-NEXT: fstps (%eax)
; X87-NEXT: retl $4
;
+; SSE686-LABEL: canon_fp32_varargsv4f32:
+; SSE686: # %bb.0:
+; SSE686-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; SSE686-NEXT: retl
+;
+; AVX686-LABEL: canon_fp32_varargsv4f32:
+; AVX686: # %bb.0:
+; AVX686-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; AVX686-NEXT: retl
+;
; SSE-LABEL: canon_fp32_varargsv4f32:
; SSE: # %bb.0:
; SSE-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -430,6 +654,18 @@ define <4 x double> @canon_fp64_varargsv4f64(<4 x double> %a) {
; X87-NEXT: fstpl (%eax)
; X87-NEXT: retl $4
;
+; SSE686-LABEL: canon_fp64_varargsv4f64:
+; SSE686: # %bb.0:
+; SSE686-NEXT: movapd {{.*#+}} xmm2 = [1.0E+0,1.0E+0]
+; SSE686-NEXT: mulpd %xmm2, %xmm0
+; SSE686-NEXT: mulpd %xmm2, %xmm1
+; SSE686-NEXT: retl
+;
+; AVX686-LABEL: canon_fp64_varargsv4f64:
+; AVX686: # %bb.0:
+; AVX686-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; AVX686-NEXT: retl
+;
; SSE-LABEL: canon_fp64_varargsv4f64:
; SSE: # %bb.0:
; SSE-NEXT: movapd {{.*#+}} xmm2 = [1.0E+0,1.0E+0]
@@ -468,6 +704,26 @@ define <2 x x86_fp80> @canon_fp80_varargsv2fp80(<2 x x86_fp80> %a) {
; X87-NEXT: fxch %st(1)
; X87-NEXT: retl
;
+; SSE686-LABEL: canon_fp80_varargsv2fp80:
+; SSE686: # %bb.0:
+; SSE686-NEXT: fldt {{[0-9]+}}(%esp)
+; SSE686-NEXT: fldt {{[0-9]+}}(%esp)
+; SSE686-NEXT: fld1
+; SSE686-NEXT: fmul %st, %st(1)
+; SSE686-NEXT: fmulp %st, %st(2)
+; SSE686-NEXT: fxch %st(1)
+; SSE686-NEXT: retl
+;
+; AVX686-LABEL: canon_fp80_varargsv2fp80:
+; AVX686: # %bb.0:
+; AVX686-NEXT: fldt {{[0-9]+}}(%esp)
+; AVX686-NEXT: fldt {{[0-9]+}}(%esp)
+; AVX686-NEXT: fld1
+; AVX686-NEXT: fmul %st, %st(1)
+; AVX686-NEXT: fmulp %st, %st(2)
+; AVX686-NEXT: fxch %st(1)
+; AVX686-NEXT: retl
+;
; SSE-LABEL: canon_fp80_varargsv2fp80:
; SSE: # %bb.0:
; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
@@ -512,6 +768,22 @@ define void @vec_canonicalize_var_v4f32(<4 x float> addrspace(1)* %out) #1 {
; X87-NEXT: fstps (%eax)
; X87-NEXT: retl
;
+; SSE686-LABEL: vec_canonicalize_var_v4f32:
+; SSE686: # %bb.0:
+; SSE686-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE686-NEXT: movaps (%eax), %xmm0
+; SSE686-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; SSE686-NEXT: movaps %xmm0, (%eax)
+; SSE686-NEXT: retl
+;
+; AVX686-LABEL: vec_canonicalize_var_v4f32:
+; AVX686: # %bb.0:
+; AVX686-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX686-NEXT: vmovaps (%eax), %xmm0
+; AVX686-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; AVX686-NEXT: vmovaps %xmm0, (%eax)
+; AVX686-NEXT: retl
+;
; SSE-LABEL: vec_canonicalize_var_v4f32:
; SSE: # %bb.0:
; SSE-NEXT: movaps (%rdi), %xmm0
@@ -566,6 +838,26 @@ define void @vec_canonicalize_var_v4f64(<4 x double> addrspace(1)* %out) #1 {
; X87-NEXT: fstpl (%eax)
; X87-NEXT: retl
;
+; SSE686-LABEL: vec_canonicalize_var_v4f64:
+; SSE686: # %bb.0:
+; SSE686-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE686-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,1.0E+0]
+; SSE686-NEXT: movapd 16(%eax), %xmm1
+; SSE686-NEXT: mulpd %xmm0, %xmm1
+; SSE686-NEXT: mulpd (%eax), %xmm0
+; SSE686-NEXT: movapd %xmm0, (%eax)
+; SSE686-NEXT: movapd %xmm1, 16(%eax)
+; SSE686-NEXT: retl
+;
+; AVX686-LABEL: vec_canonicalize_var_v4f64:
+; AVX686: # %bb.0:
+; AVX686-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX686-NEXT: vmovapd (%eax), %ymm0
+; AVX686-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; AVX686-NEXT: vmovapd %ymm0, (%eax)
+; AVX686-NEXT: vzeroupper
+; AVX686-NEXT: retl
+;
; SSE-LABEL: vec_canonicalize_var_v4f64:
; SSE: # %bb.0:
; SSE-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,1.0E+0]
@@ -626,6 +918,46 @@ define void @vec_canonicalize_x86_fp80(<4 x x86_fp80> addrspace(1)* %out) #1 {
; X87-NEXT: fstpt (%eax)
; X87-NEXT: retl
;
+; SSE686-LABEL: vec_canonicalize_x86_fp80:
+; SSE686: # %bb.0:
+; SSE686-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE686-NEXT: fldt 30(%eax)
+; SSE686-NEXT: fldt 20(%eax)
+; SSE686-NEXT: fldt 10(%eax)
+; SSE686-NEXT: fldt (%eax)
+; SSE686-NEXT: fld1
+; SSE686-NEXT: fmul %st, %st(1)
+; SSE686-NEXT: fmul %st, %st(2)
+; SSE686-NEXT: fmul %st, %st(3)
+; SSE686-NEXT: fmulp %st, %st(4)
+; SSE686-NEXT: fxch %st(3)
+; SSE686-NEXT: fstpt 30(%eax)
+; SSE686-NEXT: fxch %st(1)
+; SSE686-NEXT: fstpt 20(%eax)
+; SSE686-NEXT: fstpt 10(%eax)
+; SSE686-NEXT: fstpt (%eax)
+; SSE686-NEXT: retl
+;
+; AVX686-LABEL: vec_canonicalize_x86_fp80:
+; AVX686: # %bb.0:
+; AVX686-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX686-NEXT: fldt 30(%eax)
+; AVX686-NEXT: fldt 20(%eax)
+; AVX686-NEXT: fldt 10(%eax)
+; AVX686-NEXT: fldt (%eax)
+; AVX686-NEXT: fld1
+; AVX686-NEXT: fmul %st, %st(1)
+; AVX686-NEXT: fmul %st, %st(2)
+; AVX686-NEXT: fmul %st, %st(3)
+; AVX686-NEXT: fmulp %st, %st(4)
+; AVX686-NEXT: fxch %st(3)
+; AVX686-NEXT: fstpt 30(%eax)
+; AVX686-NEXT: fxch %st(1)
+; AVX686-NEXT: fstpt 20(%eax)
+; AVX686-NEXT: fstpt 10(%eax)
+; AVX686-NEXT: fstpt (%eax)
+; AVX686-NEXT: retl
+;
; SSE-LABEL: vec_canonicalize_x86_fp80:
; SSE: # %bb.0:
; SSE-NEXT: fldt 30(%rdi)
@@ -668,5 +1000,3 @@ define void @vec_canonicalize_x86_fp80(<4 x x86_fp80> addrspace(1)* %out) #1 {
store <4 x x86_fp80> %canonicalized, <4 x x86_fp80> addrspace(1)* %out
ret void
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; SSE2: {{.*}}
More information about the llvm-commits
mailing list