[llvm] [X86][SelectionDAG] - Add support for llvm.canonicalize intrinsic (PR #106370)
Pawan Nirpal via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 11 02:24:18 PDT 2024
https://github.com/pawan-nirpal-031 updated https://github.com/llvm/llvm-project/pull/106370
>From a824dede98e9a979dd432d0a72b01ad730474245 Mon Sep 17 00:00:00 2001
From: Pawan Anil Nirpal <pawan.anil.nirpal at intel.com>
Date: Wed, 28 Aug 2024 13:09:30 +0200
Subject: [PATCH 1/5] [X86][SelectionDAG] - Add support for llvm.canonicalize
intrinsic
Enable support for fcanonicalize intrinsic lowering.
---
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 50 +++
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 46 +++
.../CodeGen/X86/canonicalize-constants.ll | 210 +++++++++++++
.../CodeGen/X86/canonicalize-subnormals.ll | 287 ++++++++++++++++++
llvm/test/CodeGen/X86/canonicalize-vars.ll | 193 ++++++++++++
5 files changed, 786 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/canonicalize-constants.ll
create mode 100644 llvm/test/CodeGen/X86/canonicalize-subnormals.ll
create mode 100644 llvm/test/CodeGen/X86/canonicalize-vars.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 74e3a898569bea..c1679b1002df5e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1275,6 +1275,56 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
}
break;
+ case ISD::FCANONICALIZE: {
+ const Triple &TT = DAG.getTarget().getTargetTriple();
+ if (TT.getArch() == Triple::x86 || TT.getArch() == Triple::x86_64) {
+ SDValue Operand = Node->getOperand(0);
+ SDLoc dl(Node);
+ EVT VT = Operand.getValueType();
+
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Operand)) {
+ const APFloat &C = CFP->getValueAPF();
+ if (C.isDenormal()) {
+ DenormalMode Mode =
+ DAG.getMachineFunction().getDenormalMode(C.getSemantics());
+ assert((Mode != DenormalMode::getPositiveZero()) &&
+ "Positive denormal mode is not valid for X86 target.");
+ if (Mode == DenormalMode::getPreserveSign()) {
+ SDValue SDZero =
+ DAG.getConstantFP((C.isNegative() ? -0.0 : 0.0), dl, VT);
+ ConstantFPSDNode *ZeroConstFP = cast<ConstantFPSDNode>(SDZero);
+ SDValue CanonZeroFPLoad = ExpandConstantFP(ZeroConstFP, true);
+ DAG.ReplaceAllUsesWith(Node, CanonZeroFPLoad.getNode());
+ LLVM_DEBUG(dbgs()
+ << "Legalized Denormal under mode PreserveSign\n");
+ return;
+ } else if (Mode == DenormalMode::getIEEE()) {
+ DAG.ReplaceAllUsesWith(Node, Operand.getNode());
+ LLVM_DEBUG(dbgs() << "Legalized Denormal under mode IEEE\n");
+ return;
+ }
+ } else if (C.isNaN() && C.isSignaling()) {
+ APFloat CanonicalQNaN = APFloat::getQNaN(C.getSemantics());
+ SDValue QuitNaN = DAG.getConstantFP(CanonicalQNaN, dl, VT);
+ ConstantFPSDNode *QNaNConstFP = cast<ConstantFPSDNode>(QuitNaN);
+ SDValue QNanLoad = ExpandConstantFP(QNaNConstFP, true);
+ DAG.ReplaceAllUsesWith(Node, QNanLoad.getNode());
+ LLVM_DEBUG(dbgs() << "Legalized Signaling NaN to Quiet NaN\n");
+ return;
+ }
+ } else if (Operand.isUndef()) {
+ APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
+ SDValue QuitNaN = DAG.getConstantFP(CanonicalQNaN, dl, VT);
+ ConstantFPSDNode *QNaNConstFP = cast<ConstantFPSDNode>(QuitNaN);
+ SDValue QNanLoad = ExpandConstantFP(QNaNConstFP, true);
+ DAG.ReplaceAllUsesWith(Node, QNanLoad.getNode());
+ LLVM_DEBUG(dbgs() << "Legalized Undef to Quiet NaN\n");
+ return;
+ }
+ break;
+ }
+ break;
+ }
case ISD::FSHL:
case ISD::FSHR:
case ISD::SRL_PARTS:
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index d0a54ab8993c26..4bb8c9afd23edc 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5271,6 +5271,52 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
}
break;
}
+ case ISD::FCANONICALIZE: {
+ SDValue Operand = Node->getOperand(0);
+ EVT VT = Node->getValueType(0);
+
+ // Perform canonicalization for constants. Replace the operand by a load
+ // from constant pool for this constant. At this point subnoraml values like
+ // denormals, snans have been canonicalized so no need to deal with those
+ // cases.
+ if (LoadSDNode *Load = dyn_cast<LoadSDNode>(Operand)) {
+ const X86TargetLowering *X86Lowering =
+ static_cast<const X86TargetLowering *>(TLI);
+ if (const Constant *CV = X86Lowering->getTargetConstantFromLoad(Load)) {
+ const ConstantFP *CFP = dyn_cast<ConstantFP>(CV);
+ if (CFP) {
+ ReplaceNode(Node, Load);
+ return;
+ }
+ }
+ }
+
+ // Canonicalize normal non-constant/non-undef FP Nodes.
+ SDValue MulNode;
+ SDValue One;
+ if (VT == MVT::f32 || VT == MVT::f64) {
+ One = CurDAG->getConstantFP(1.0f, dl, VT);
+ } else if (VT == MVT::f80) {
+ APFloat Val = APFloat::getOne(APFloat::x87DoubleExtended());
+ One = CurDAG->getConstantFP(Val, dl, VT);
+ } else if (VT == MVT::f16) {
+ APFloat Val(APFloat::IEEEhalf(), "1.0");
+ One = CurDAG->getConstantFP(Val, dl, VT);
+ } else if (VT == MVT::bf16) {
+ APFloat Val(APFloat::BFloat(), "1.0");
+ One = CurDAG->getConstantFP(Val, dl, VT);
+ } else {
+ // Is it better to assert? when we encounter an unknown FP type,Than to
+ // just replace with the operand! As this might be our last attempt at
+ // legalization.
+ ReplaceNode(Node, Operand.getNode());
+ return;
+ }
+ // TODO : Follow-up with tablegen pattern to generate mul * 1.0.
+ MulNode = CurDAG->getNode(ISD::FMUL, dl, VT, Operand, One);
+ ReplaceNode(Node, MulNode.getNode());
+ return;
+ }
case ISD::BRIND:
case X86ISD::NT_BRIND: {
if (Subtarget->isTargetNaCl())
diff --git a/llvm/test/CodeGen/X86/canonicalize-constants.ll b/llvm/test/CodeGen/X86/canonicalize-constants.ll
new file mode 100644
index 00000000000000..b71c74bcd4472b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/canonicalize-constants.ll
@@ -0,0 +1,210 @@
+; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 < %s | FileCheck %s
+
+define float @canon_fp32() {
+ ; CHECK-LABEL: .LCPI0_0:
+ ; CHECK: .long 0x40400000 # float 3
+ ; CHECK-LABEL: canon_fp32
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: vmovss .LCPI0_0(%rip), %xmm0 # xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
+ ; CHECK-NEXT: retq
+ %canonicalized = call float @llvm.canonicalize.f32(float 3.0)
+ ret float %canonicalized
+}
+
+define half @canon_fp16() {
+ ; CHECK-LABEL: .LCPI1_0:
+ ; CHECK: .short 0x4200 # half 3
+ ; CHECK-LABEL: canon_fp16
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: vmovsh .LCPI1_0(%rip), %xmm0
+ ; CHECK-NEXT: retq
+ %canonicalized = call half @llvm.canonicalize.f16(half 0xH4200) ; half 3.0
+ ret half %canonicalized
+}
+
+define double @canon_fp64() {
+ ; CHECK-LABEL: .LCPI2_0:
+ ; CHECK: .quad 0x4008000000000000 # double 3
+ ; CHECK-LABEL: canon_fp64
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: vmovsd .LCPI2_0(%rip), %xmm0
+ ; CHECK-NEXT: retq
+ %canonicalized = call double @llvm.canonicalize.f64(double 3.0)
+ ret double %canonicalized
+}
+
+define x86_fp80 @canon_fp80() {
+ ; CHECK-LABEL: .LCPI3_0:
+ ; CHECK: .long 0x42b40000 # float 90
+ ; CHECK-LABEL: canon_fp80
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: flds .LCPI3_0(%rip)
+ ; CHECK-NEXT: retq
+
+ %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK4005B400000000000000) ; 90.0
+ ret x86_fp80 %canonicalized
+}
+
+
+define x86_fp80 @complex_canonicalize_x86_fp80(x86_fp80 %a, x86_fp80 %b) {
+entry:
+ ; CHECK-LABEL: .LCPI4_0:
+ ; CHECK: .long 0x42b40000 # float 90
+ ; CHECK-LABEL: complex_canonicalize_x86_fp80
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: fldt 24(%rsp)
+ ; CHECK-NEXT: flds .LCPI4_0(%rip)
+ ; CHECK-NEXT: fsubp %st, %st(1)
+ ; CHECK-NEXT: retq
+
+ %mul1 = fsub x86_fp80 %a, %b
+ %add = fadd x86_fp80 %mul1, %b
+ %mul2 = fsub x86_fp80 %add, %mul1
+ %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK4005B400000000000000)
+ %result = fsub x86_fp80 %canonicalized, %b
+ ret x86_fp80 %result
+}
+
+define double @complex_canonicalize_fp64(double %a, double %b) unnamed_addr #0 {
+start:
+ ; CHECK-LABEL: .LCPI5_0:
+ ; CHECK: .quad 0x4008000000000000 # double 3
+ ; CHECK-LABEL: complex_canonicalize_fp64
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: vmovsd .LCPI5_0(%rip), %xmm0
+ ; CHECK-NEXT: retq
+
+ %c = fcmp olt double %a, %b
+ %d = fcmp uno double %a, 0.000000e+00
+ %or.cond.i.i = or i1 %d, %c
+ %e = select i1 %or.cond.i.i, double %b, double %a
+ %f = tail call double @llvm.canonicalize.f64(double 3.0) #2
+ ret double %f
+}
+
+define void @test_fold_canonicalize_p0_f32(float addrspace(1)* %out) #1 {
+ ; CHECK-LAEBL: test_fold_canonicalize_p0_f32
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
+ ; CHECK-NEXT: vmovss %xmm0, (%rdi)
+ ; CHECK-NEXT: retq
+ %canonicalized = call float @llvm.canonicalize.f32(float 0.0)
+ store float %canonicalized, float addrspace(1)* %out
+ ret void
+}
+
+define void @test_fold_canonicalize_n0_f32(float addrspace(1)* %out) #1 {
+ ; CHECK-LAEBL: .LCPI7_0:
+ ; CHECK: .long 0x80000000 # float -0
+ ; CHECK-LAEBL: test_fold_canonicalize_n0_f32
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: vmovss .LCPI7_0(%rip), %xmm0
+ ; CHECK-NEXT: vmovss %xmm0, (%rdi)
+ ; CHECK-NEXT: retq
+ %canonicalized = call float @llvm.canonicalize.f32(float -0.0)
+ store float %canonicalized, float addrspace(1)* %out
+ ret void
+}
+
+
+define void @v_test_canonicalize_p90_x86_fp80(x86_fp80 addrspace(1)* %out) #1 {
+ ; CHECK-LAEBL: .LCPI8_0:
+ ; CHECK: .long 0x42b40000 # float 90
+ ; CHECK-LAEBL: v_test_canonicalize_p90_x86_fp80
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: flds .LCPI8_0(%rip)
+ ; CHECK-NEXT: fstpt (%rdi)
+ ; CHECK-NEXT: retq
+ %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK4005B400000000000000)
+ store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out
+ ret void
+}
+
+define void @v_test_canonicalize_p3__half(half addrspace(1)* %out) {
+ ; CHECK-LABEL: .LCPI9_0:
+ ; CHECK: .short 0x4200 # half 3
+ ; CHECK-LABEL: v_test_canonicalize_p3__half:
+ ; CHECK: # %bb.0: # %entry
+ ; CHECK-NEXT: vmovsh .LCPI9_0(%rip), %xmm0
+ ; CHECK-NEXT: vmovsh %xmm0, (%rdi)
+ ; CHECK-NEXT: retq
+
+entry:
+ %canonicalized = call half @llvm.canonicalize.f16(half 0xH4200)
+ store half %canonicalized, half addrspace(1)* %out
+ ret void
+}
+
+define void @v_test_canonicalize_p3_f64(double addrspace(1)* %out) #1 {
+ ; CHECK-LABEL: .LCPI10_0:
+ ; CHECK: .quad 0x4008000000000000 # double 3
+ ; CHECK-LAEBL: v_test_canonicalize_p3_f64
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: vmovsd .LCPI10_0(%rip), %xmm0
+ ; CHECK-NEXT: vmovsd %xmm0, (%rdi)
+ ; CHECK-NEXT: retq
+entry:
+ %canonicalized = call double @llvm.canonicalize.f64(double 3.0)
+ store double %canonicalized, double addrspace(1)* %out
+ ret void
+}
+
+define void @v_test_canonicalize_p3__bfloat(bfloat addrspace(1)* %out) {
+ ; CHECK-LABEL: .LCPI11_0:
+ ; CHECK: .long 0x40400000 # float 3
+ ; CHECK-LABEL: v_test_canonicalize_p3__bfloat:
+ ; CHECK: # %bb.0: # %entry
+ ; CHECK-NEXT: vmovss .LCPI11_0(%rip), %xmm0 # xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
+ ; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0
+ ; CHECK-NEXT: vpextrw $0, %xmm0, (%rdi)
+ ; CHECK-NEXT: retq
+
+entry:
+ %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat 3.0)
+ store bfloat %canonicalized, bfloat addrspace(1)* %out
+ ret void
+}
+
+define void @v_test_canonicalize_n3__bfloat(bfloat addrspace(1)* %out) {
+ ; CHECK-LABEL: .LCPI12_0:
+ ; CHECK: .long 0xc0400000 # float -3
+ ; CHECK-LABEL: v_test_canonicalize_n3__bfloat:
+ ; CHECK: # %bb.0: # %entry
+ ; CHECK-NEXT: vmovss .LCPI12_0(%rip), %xmm0 # xmm0 = [-3.0E+0,0.0E+0,0.0E+0,0.0E+0]
+ ; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0
+ ; CHECK-NEXT: vpextrw $0, %xmm0, (%rdi)
+ ; CHECK-NEXT: retq
+
+entry:
+ %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat -3.0)
+ store bfloat %canonicalized, bfloat addrspace(1)* %out
+ ret void
+}
+
+define void @v_test_canonicalize_n90_x86_fp80(x86_fp80 addrspace(1)* %out) #1 {
+ ; CHECK-LAEBL: .LCPI13_0:
+ ; CHECK: .long 0xc2b40000 # float -90
+ ; CHECK-LAEBL: v_test_canonicalize_n90_x86_fp80
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: flds .LCPI13_0(%rip)
+ ; CHECK-NEXT: fstpt (%rdi)
+ ; CHECK-NEXT: retq
+ %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xKC005B400000000000000)
+ store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out
+ ret void
+}
+
+define void @v_test_canonicalize_n3__half(half addrspace(1)* %out) {
+ ; CHECK-LABEL: .LCPI14_0:
+ ; CHECK: .short 0xc200 # half -3
+ ; CHECK-LABEL: v_test_canonicalize_n3__half:
+ ; CHECK: # %bb.0: # %entry
+ ; CHECK-NEXT: vmovsh .LCPI14_0(%rip), %xmm0
+ ; CHECK-NEXT: vmovsh %xmm0, (%rdi)
+ ; CHECK-NEXT: retq
+
+entry:
+ %canonicalized = call half @llvm.canonicalize.f16(half 0xHC200)
+ store half %canonicalized, half addrspace(1)* %out
+ ret void
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/X86/canonicalize-subnormals.ll b/llvm/test/CodeGen/X86/canonicalize-subnormals.ll
new file mode 100644
index 00000000000000..8e7e04c2a67dc8
--- /dev/null
+++ b/llvm/test/CodeGen/X86/canonicalize-subnormals.ll
@@ -0,0 +1,287 @@
+; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck %s
+; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL %s
+; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL %s
+
+define void @canonicalize_denormal1_f32_pre_sign(float addrspace(1)* %out) {
+ ; CHECK-LABEL: .LCPI0_0:
+ ; CHECK: .long 0x80000000 # float -0
+ ; CHECK-LABEL: canonicalize_denormal1_f32_pre_sign:
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: vmovss .LCPI0_0(%rip), %xmm0
+ ; CHECK-NEXT: vmovss %xmm0, (%rdi)
+ ; CHECK-NEXT: retq
+
+ %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
+ store float %canonicalized, float addrspace(1)* %out
+ ret void
+}
+
+define void @canonicalize_denormal1_f64_pre_sign(double addrspace(1)* %out) {
+ ; CHECK-LABEL: .LCPI1_0:
+ ; CHECK: .quad 0x8000000000000000 # double -0
+ ; CHECK-LABEL: canonicalize_denormal1_f64_pre_sign:
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: vmovsd .LCPI1_0(%rip), %xmm0
+ ; CHECK-NEXT: vmovsd %xmm0, (%rdi)
+ ; CHECK-NEXT: retq
+
+ %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
+ store double %canonicalized, double addrspace(1)* %out
+ ret void
+}
+
+
+define void @canonicalize_qnan_f64(double addrspace(1)* %out) {
+ ;cCHECK-LABEL: .LCPI2_0:
+ ;cCHECK: .quad 0x7ff8000000000000 # double NaN
+ ; CHECK-LABEL: canonicalize_qnan_f64:
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: vmovsd .LCPI2_0(%rip), %xmm0
+ ; CHECK-NEXT: vmovsd %xmm0, (%rdi)
+ ; CHECK-NEXT: retq
+
+ %canonicalized = call double @llvm.canonicalize.f64(double 0x7FF8000000000000)
+ store double %canonicalized, double addrspace(1)* %out
+ ret void
+}
+
+define void @canonicalize_qnan_value_neg1_f64(double addrspace(1)* %out) {
+ ;cCHECK-LABEL: .LCPI3_0:
+ ;cCHECK: .quad 0xffffffffffffffff # double NaN
+ ; CHECK-LABEL: canonicalize_qnan_value_neg1_f64:
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: vmovsd .LCPI3_0(%rip), %xmm0
+ ; CHECK-NEXT: vmovsd %xmm0, (%rdi)
+ ; CHECK-NEXT: retq
+
+ %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -1 to double))
+ store double %canonicalized, double addrspace(1)* %out
+ ret void
+}
+
+define void @canonicalize_qnan_value_neg2_f64(double addrspace(1)* %out) {
+ ; CHECK-LABEL: .LCPI4_0:
+ ; CHECK: .quad 0xfffffffffffffffe # double NaN
+ ; CHECK-LABEL: canonicalize_qnan_value_neg2_f64:
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: vmovsd .LCPI4_0(%rip), %xmm0
+ ; CHECK-NEXT: vmovsd %xmm0, (%rdi)
+ ; CHECK-NEXT: retq
+
+ %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -2 to double))
+ store double %canonicalized, double addrspace(1)* %out
+ ret void
+}
+
+define void @canonicalize_snan0_value_f64(double addrspace(1)* %out) {
+ ; CHECK-LABEL: .LCPI5_0:
+ ; CHECK: .quad 0x7ff8000000000000 # double NaN
+ ; CHECK-LABEL: canonicalize_snan0_value_f64:
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: vmovsd .LCPI5_0(%rip), %xmm0
+ ; CHECK-NEXT: vmovsd %xmm0, (%rdi)
+ ; CHECK-NEXT: retq
+
+ %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9218868437227405313 to double))
+ store double %canonicalized, double addrspace(1)* %out
+ ret void
+}
+
+define void @canonicalize_undef(double addrspace(1)* %out) {
+ ; CHECK-LABEL: .LCPI6_0:
+ ; CHECK: .quad 0x7ff8000000000000 # double NaN
+ ; CHECK-LABEL: canonicalize_undef:
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: vmovsd .LCPI6_0(%rip), %xmm0
+ ; CHECK-NEXT: vmovsd %xmm0, (%rdi)
+ ; CHECK-NEXT: retq
+
+ %canonicalized = call double @llvm.canonicalize.f64(double undef)
+ store double %canonicalized, double addrspace(1)* %out
+ ret void
+}
+
+define void @canonicalize_denormal1_f32_ieee(float addrspace(1)* %out) {
+ ; IEEE-DENORMAL-LABEL: .LCPI7_0:
+ ; IEEE-DENORMAL: .long 0x807fffff # float -1.17549421E-38
+ ; IEEE-DENORMAL-LABEL: canonicalize_denormal1_f32_ieee:
+ ; IEEE-DENORMAL: # %bb.0:
+ ; IEEE-DENORMAL-NEXT: vmovss .LCPI7_0(%rip), %xmm0
+ ; IEEE-DENORMAL-NEXT: vmovss %xmm0, (%rdi)
+ ; IEEE-DENORMAL-NEXT: retq
+
+ %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
+ store float %canonicalized, float addrspace(1)* %out
+ ret void
+}
+
+define void @canonicalize_denormal1_f64_ieee(double addrspace(1)* %out) {
+ ; IEEE-DENORMAL-LABEL: .LCPI8_0:
+ ; IEEE-DENORMAL: .quad 0x800fffffffffffff # double -2.2250738585072009E-308
+ ; IEEE-DENORMAL-LABEL: canonicalize_denormal1_f64_ieee:
+ ; IEEE-DENORMAL: # %bb.0:
+ ; IEEE-DENORMAL-NEXT: vmovsd .LCPI8_0(%rip), %xmm0
+ ; IEEE-DENORMAL-NEXT: vmovsd %xmm0, (%rdi)
+ ; IEEE-DENORMAL-NEXT: retq
+
+ %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
+ store double %canonicalized, double addrspace(1)* %out
+ ret void
+}
+
+define void @canonicalize_denormal1_f32_dynamic(float addrspace(1)* %out) {
+ ; DYN-DENORMAL-LABEL: .LCPI9_0:
+ ; DYN-DENORMAL: .long 0x807fffff # float -1.17549421E-38
+ ; DYN-DENORMAL-LABEL: canonicalize_denormal1_f32_dynamic:
+ ; DYN-DENORMAL: # %bb.0:
+ ; DYN-DENORMAL-NEXT: vmovss .LCPI9_0(%rip), %xmm0
+ ; DYN-DENORMAL-NEXT: vmovss %xmm0, (%rdi)
+ ; DYN-DENORMAL-NEXT: retq
+
+ %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
+ store float %canonicalized, float addrspace(1)* %out
+ ret void
+}
+
+define void @canonicalize_denormal1_f64_dynamic(double addrspace(1)* %out) {
+ ; DYN-DENORMAL-LABEL: .LCPI10_0:
+ ; DYN-DENORMAL: .quad 0x800fffffffffffff # double -2.2250738585072009E-308
+ ; DYN-DENORMAL-LABEL: canonicalize_denormal1_f64_dynamic:
+ ; DYN-DENORMAL: # %bb.0:
+ ; DYN-DENORMAL-NEXT: vmovsd .LCPI10_0(%rip), %xmm0
+ ; DYN-DENORMAL-NEXT: vmovsd %xmm0, (%rdi)
+ ; DYN-DENORMAL-NEXT: retq
+
+ %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
+ store double %canonicalized, double addrspace(1)* %out
+ ret void
+}
+
+define void @canonicalize_denormal1_bfloat_pre_sign(bfloat addrspace(1)* %out) {
+ ; CHECK-LABEL: .LCPI11_0:
+ ; CHECK: .long 0x80000000 # float -0
+ ; CHECK-LABEL: canonicalize_denormal1_bfloat_pre_sign:
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: vmovss .LCPI11_0(%rip), %xmm0
+ ; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0
+ ; CHECK-NEXT: vpextrw $0, %xmm0, (%rdi)
+ ; CHECK-NEXT: retq
+
+ %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat bitcast (i16 32768 to bfloat))
+ store bfloat %canonicalized, bfloat addrspace(1)* %out
+ ret void
+}
+
+
+define void @canonicalize_denormal1_bfloat_ieee(bfloat addrspace(1)* %out) {
+ ; IEEE-DENORMAL-LABEL: .LCPI12_0:
+ ; IEEE-DENORMAL: .long 0x80000000 # float -0
+ ; IEEE-DENORMAL-LABEL: canonicalize_denormal1_bfloat_ieee:
+ ; IEEE-DENORMAL: # %bb.0:
+ ; IEEE-DENORMAL-NEXT: vmovss .LCPI12_0(%rip), %xmm0
+ ; IEEE-DENORMAL-NEXT: vcvtneps2bf16 %xmm0, %xmm0
+ ; IEEE-DENORMAL-NEXT: vpextrw $0, %xmm0, (%rdi)
+ ; IEEE-DENORMAL-NEXT: retq
+
+ %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat bitcast (i16 32768 to bfloat))
+ store bfloat %canonicalized, bfloat addrspace(1)* %out
+ ret void
+}
+
+
+define void @canonicalize_denormal1_bfloat_dynamic(bfloat addrspace(1)* %out) {
+ ; DYN-DENORMAL-LABEL: .LCPI13_0:
+ ; DYN-DENORMAL: .long 0x80000000 # float -0
+ ; DYN-DENORMAL-LABEL: canonicalize_denormal1_bfloat_dynamic:
+ ; DYN-DENORMAL: # %bb.0:
+ ; DYN-DENORMAL-NEXT: vmovss .LCPI13_0(%rip), %xmm0
+ ; DYN-DENORMAL-NEXT: vcvtneps2bf16 %xmm0, %xmm0
+ ; DYN-DENORMAL-NEXT: vpextrw $0, %xmm0, (%rdi)
+ ; DYN-DENORMAL-NEXT: retq
+
+ %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat bitcast (i16 32768 to bfloat))
+ store bfloat %canonicalized, bfloat addrspace(1)* %out
+ ret void
+}
+
+define void @canonicalize_denormal1_half_pre_sign(half addrspace(1)* %out) {
+ ; CHECK-LABEL: .LCPI14_0:
+ ; CHECK: .short 0x8000 # half -0
+ ; CHECK-LABEL: canonicalize_denormal1_half_pre_sign:
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: vmovsh .LCPI14_0(%rip), %xmm0
+ ; CHECK-NEXT: vmovsh %xmm0, (%rdi)
+ ; CHECK-NEXT: retq
+
+ %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 32768 to half))
+ store half %canonicalized, half addrspace(1)* %out
+ ret void
+}
+
+
+define void @canonicalize_denormal1_half_ieee(half addrspace(1)* %out) {
+ ; IEEE-DENORMAL-LABEL: .LCPI15_0:
+ ; IEEE-DENORMAL: .short 0x8000 # half -0
+ ; IEEE-DENORMAL-LABEL: canonicalize_denormal1_half_ieee:
+ ; IEEE-DENORMAL: # %bb.0:
+ ; IEEE-DENORMAL-NEXT: vmovsh .LCPI15_0(%rip), %xmm0
+ ; IEEE-DENORMAL-NEXT: vmovsh %xmm0, (%rdi)
+ ; IEEE-DENORMAL-NEXT: retq
+
+ %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 32768 to half))
+ store half %canonicalized, half addrspace(1)* %out
+ ret void
+}
+
+define void @canonicalize_denormal1_half_dynamic(half addrspace(1)* %out) {
+ ; DYN-DENORMAL-LABEL: .LCPI16_0:
+ ; DYN-DENORMAL: .short 0x8000 # half -0
+ ; DYN-DENORMAL-LABEL: canonicalize_denormal1_half_dynamic:
+ ; DYN-DENORMAL: # %bb.0:
+ ; DYN-DENORMAL-NEXT: vmovsh .LCPI16_0(%rip), %xmm0
+ ; DYN-DENORMAL-NEXT: vmovsh %xmm0, (%rdi)
+ ; DYN-DENORMAL-NEXT: retq
+
+ %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 32768 to half))
+ store half %canonicalized, half addrspace(1)* %out
+ ret void
+}
+
+define void @canonicalize_denormal1_x86_fp80_pre_sign(x86_fp80 addrspace(1)* %out) {
+ ; CHECK-LAEBL: .LCPI17_0:
+ ; CHECK: .long 0x00000000 # float 0
+ ; CHECK-LAEBL: canonicalize_denormal1_x86_fp80_pre_sign
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: flds .LCPI17_0(%rip)
+ ; CHECK-NEXT: fstpt (%rdi)
+ ; CHECK-NEXT: retq
+ %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK00000000000000000001)
+ store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out
+ ret void
+}
+
+define void @canonicalize_denormal1_x86_fp80_dynamic(x86_fp80 addrspace(1)* %out) {
+ ; DYN-DENORMAL-LAEBL: .LCPI17_0:
+ ; DYN-DENORMAL: .quad 0x0000000000000001 # x86_fp80 3.64519953188247460253E-4951
+ ; DYN-DENORMAL-LAEBL: canonicalize_denormal1_x86_fp80_dynamic
+ ; DYN-DENORMAL: # %bb.0:
+ ; DYN-DENORMAL-NEXT: fldt .LCPI17_0(%rip)
+ ; DYN-DENORMAL-NEXT: fstpt (%rdi)
+ ; DYN-DENORMAL-NEXT: retq
+ %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK00000000000000000001)
+ store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out
+ ret void
+}
+
+define void @canonicalize_denormal1_x86_fp80_ieee(x86_fp80 addrspace(1)* %out) {
+ ; IEEE-DENORMAL-LAEBL: .LCPI17_0:
+ ; IEEE-DENORMAL: .quad 0x0000000000000001 # x86_fp80 3.64519953188247460253E-4951
+ ; IEEE-DENORMAL-LAEBL: canonicalize_denormal1_x86_fp80_ieee
+ ; IEEE-DENORMAL: # %bb.0:
+ ; IEEE-DENORMAL-NEXT: fldt .LCPI17_0(%rip)
+ ; IEEE-DENORMAL-NEXT: fstpt (%rdi)
+ ; IEEE-DENORMAL-NEXT: retq
+ %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK00000000000000000001)
+ store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out
+ ret void
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/X86/canonicalize-vars.ll b/llvm/test/CodeGen/X86/canonicalize-vars.ll
new file mode 100644
index 00000000000000..c1b5dd0dddcd2b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/canonicalize-vars.ll
@@ -0,0 +1,193 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5
+; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 < %s | FileCheck %s
+
+define half @complex_canonicalize_fmul_half(half %a, half %b) {
+; CHECK-LABEL: complex_canonicalize_fmul_half:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm2
+; CHECK-NEXT: vsubsh %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+entry:
+
+ %mul1 = fsub half %a, %b
+ %add = fadd half %mul1, %b
+ %mul2 = fsub half %add, %mul1
+ %canonicalized = call half @llvm.canonicalize.f16(half %mul2)
+ %result = fsub half %canonicalized, %b
+ ret half %result
+}
+
+define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) {
+entry:
+ ; CHECK-LABEL: complex_canonicalize_fmul_x86_fp80
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: fldt 24(%rsp)
+ ; CHECK-NEXT: fldt 8(%rsp)
+ ; CHECK-NEXT: fsub %st(1), %st
+ ; CHECK-NEXT: fld %st(0)
+ ; CHECK-NEXT: fadd %st(2), %st
+ ; CHECK-NEXT: fsubp %st, %st(1)
+ ; CHECK-NEXT: fsubp %st, %st(1)
+ ; CHECK-NEXT: retq
+
+ %mul1 = fsub x86_fp80 %a, %b
+ %add = fadd x86_fp80 %mul1, %b
+ %mul2 = fsub x86_fp80 %add, %mul1
+ %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %mul2)
+ %result = fsub x86_fp80 %canonicalized, %b
+ ret x86_fp80 %result
+}
+
+define bfloat @complex_canonicalize_fmul_bfloat(bfloat %a, bfloat %b) {
+; CHECK-LABEL: complex_canonicalize_fmul_bfloat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovw %xmm0, %eax
+; CHECK-NEXT: vmovw %xmm1, %ecx
+; CHECK-NEXT: shll $16, %ecx
+; CHECK-NEXT: vmovd %ecx, %xmm0
+; CHECK-NEXT: shll $16, %eax
+; CHECK-NEXT: vmovd %eax, %xmm1
+; CHECK-NEXT: vsubss %xmm0, %xmm1, %xmm1
+; CHECK-NEXT: vcvtneps2bf16 %xmm1, %xmm1
+; CHECK-NEXT: vmovw %xmm1, %eax
+; CHECK-NEXT: shll $16, %eax
+; CHECK-NEXT: vmovd %eax, %xmm1
+; CHECK-NEXT: vaddss %xmm0, %xmm1, %xmm2
+; CHECK-NEXT: vcvtneps2bf16 %xmm2, %xmm2
+; CHECK-NEXT: vmovw %xmm2, %eax
+; CHECK-NEXT: shll $16, %eax
+; CHECK-NEXT: vmovd %eax, %xmm2
+; CHECK-NEXT: vsubss %xmm1, %xmm2, %xmm1
+; CHECK-NEXT: vcvtneps2bf16 %xmm1, %xmm1
+; CHECK-NEXT: vmovw %xmm1, %eax
+; CHECK-NEXT: shll $16, %eax
+; CHECK-NEXT: vmovd %eax, %xmm1
+; CHECK-NEXT: vcvtneps2bf16 %xmm1, %xmm1
+; CHECK-NEXT: vmovw %xmm1, %eax
+; CHECK-NEXT: shll $16, %eax
+; CHECK-NEXT: vmovd %eax, %xmm1
+; CHECK-NEXT: vsubss %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0
+; CHECK-NEXT: vmovw %xmm0, %eax
+; CHECK-NEXT: vmovw %eax, %xmm0
+; CHECK-NEXT: retq
+
+entry:
+
+ %sub1 = fsub bfloat %a, %b
+ %add = fadd bfloat %sub1, %b
+ %sub2 = fsub bfloat %add, %sub1
+ %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat %sub2)
+ %result = fsub bfloat %canonicalized, %b
+ ret bfloat %result
+}
+
+define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 {
+start:
+ ; CHECK-LABEL: canonicalize_fp64:
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
+ ; CHECK-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
+ ; CHECK-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1}
+ ; CHECK-NEXT: vmovapd %xmm2, %xmm0
+ ; CHECK-NEXT: retq
+
+ %c = fcmp olt double %a, %b
+ %d = fcmp uno double %a, 0.000000e+00
+ %or.cond.i.i = or i1 %d, %c
+ %e = select i1 %or.cond.i.i, double %b, double %a
+ %f = tail call double @llvm.canonicalize.f64(double %e) #2
+ ret double %f
+}
+
+define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 {
+start:
+ ; CHECK-LABEL: canonicalize_fp32:
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: vmaxss %xmm0, %xmm1, %xmm2
+ ; CHECK-NEXT: vcmpunordss %xmm0, %xmm0, %k1
+ ; CHECK-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
+ ; CHECK-NEXT: vmovaps %xmm2, %xmm0
+ ; CHECK-NEXT: retq
+
+ %cc = fcmp olt float %aa, %bb
+ %dd = fcmp uno float %aa, 0.000000e+00
+ %or.cond.i.i.x = or i1 %dd, %cc
+ %ee = select i1 %or.cond.i.i.x, float %bb, float %aa
+ %ff = tail call float @llvm.canonicalize.f32(float %ee) #2
+ ret float %ff
+}
+
+define void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 {
+ ; CHECK-LAEBL: v_test_canonicalize_var_f32
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: vmovss (%rdi), %xmm0
+ ; CHECK-NEXT: vmovss %xmm0, (%rdi)
+ ; CHECK-NEXT: retq
+ %val = load float, float addrspace(1)* %out
+ %canonicalized = call float @llvm.canonicalize.f32(float %val)
+ store float %canonicalized, float addrspace(1)* %out
+ ret void
+}
+
+define void @v_test_canonicalize_x86_fp80(x86_fp80 addrspace(1)* %out) #1 {
+ ; CHECK-LAEBL: v_test_canonicalize_x86_fp80
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: fldt (%rdi)
+ ; CHECK-NEXT: fstpt (%rdi)
+ ; CHECK-NEXT: retq
+ %val = load x86_fp80, x86_fp80 addrspace(1)* %out
+ %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %val)
+ store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out
+ ret void
+}
+
+define void @v_test_canonicalize__half(half addrspace(1)* %out) {
+; CHECK-LABEL: v_test_canonicalize__half:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovsh (%rdi), %xmm0 # xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT: vmovsh %xmm0, (%rdi)
+; CHECK-NEXT: retq
+entry:
+ %val = load half, half addrspace(1)* %out
+ %canonicalized = call half @llvm.canonicalize.f16(half %val)
+ store half %canonicalized, half addrspace(1)* %out
+ ret void
+}
+
+define void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 {
+ ; CHECK-LAEBL: v_test_canonicalize_var_f64
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: vmovsd (%rdi), %xmm0 # xmm0 = mem[0],zero
+ ; CHECK-NEXT: vmovsd %xmm0, (%rdi)
+ ; CHECK-NEXT: retq
+ %val = load double, double addrspace(1)* %out
+ %canonicalized = call double @llvm.canonicalize.f64(double %val)
+ store double %canonicalized, double addrspace(1)* %out
+ ret void
+}
+
+define void @v_test_canonicalize__bfloat(bfloat addrspace(1)* %out) {
+; CHECK-LABEL: v_test_canonicalize__bfloat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movzwl (%rdi), %eax
+; CHECK-NEXT: shll $16, %eax
+; CHECK-NEXT: vmovd %eax, %xmm0
+; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0
+; CHECK-NEXT: vpextrw $0, %xmm0, (%rdi)
+; CHECK-NEXT: retq
+
+entry:
+ %val = load bfloat, bfloat addrspace(1)* %out
+ %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat %val)
+ store bfloat %canonicalized, bfloat addrspace(1)* %out
+ ret void
+}
+
+declare double @llvm.canonicalize.f64(double)
+declare float @llvm.canonicalize.f32(float)
+declare bfloat @llvm.canonicalize.bf16(bfloat)
+declare x86_fp80 @llvm.canonicalize.f80(x86_fp80)
+declare half @llvm.canonicalize.f16(half)
\ No newline at end of file
>From 34d5244817bcd98c50bffea2a551b5b94722d855 Mon Sep 17 00:00:00 2001
From: Pawan Anil Nirpal <pawan.anil.nirpal at intel.com>
Date: Fri, 6 Sep 2024 09:24:44 +0200
Subject: [PATCH 2/5] Move combine operations to DAG combiner over from
legalizer, address comments
---
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 50 -
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 46 -
llvm/lib/Target/X86/X86ISelLowering.cpp | 120 +
.../CodeGen/X86/canonicalize-constants.ll | 593 ++++-
.../CodeGen/X86/canonicalize-subnormals.ll | 1929 +++++++++++++++--
llvm/test/CodeGen/X86/canonicalize-vars.ll | 997 ++++++++-
6 files changed, 3270 insertions(+), 465 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index c1679b1002df5e..74e3a898569bea 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1275,56 +1275,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
}
break;
- case ISD::FCANONICALIZE: {
- const Triple &TT = DAG.getTarget().getTargetTriple();
- if (TT.getArch() == Triple::x86 || TT.getArch() == Triple::x86_64) {
- SDValue Operand = Node->getOperand(0);
- SDLoc dl(Node);
- EVT VT = Operand.getValueType();
-
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Operand)) {
- const APFloat &C = CFP->getValueAPF();
- if (C.isDenormal()) {
- DenormalMode Mode =
- DAG.getMachineFunction().getDenormalMode(C.getSemantics());
- assert((Mode != DenormalMode::getPositiveZero()) &&
- "Positive denormal mode is not valid for X86 target.");
- if (Mode == DenormalMode::getPreserveSign()) {
- SDValue SDZero =
- DAG.getConstantFP((C.isNegative() ? -0.0 : 0.0), dl, VT);
- ConstantFPSDNode *ZeroConstFP = cast<ConstantFPSDNode>(SDZero);
- SDValue CanonZeroFPLoad = ExpandConstantFP(ZeroConstFP, true);
- DAG.ReplaceAllUsesWith(Node, CanonZeroFPLoad.getNode());
- LLVM_DEBUG(dbgs()
- << "Legalized Denormal under mode PreserveSign\n");
- return;
- } else if (Mode == DenormalMode::getIEEE()) {
- DAG.ReplaceAllUsesWith(Node, Operand.getNode());
- LLVM_DEBUG(dbgs() << "Legalized Denormal under mode IEEE\n");
- return;
- }
- } else if (C.isNaN() && C.isSignaling()) {
- APFloat CanonicalQNaN = APFloat::getQNaN(C.getSemantics());
- SDValue QuitNaN = DAG.getConstantFP(CanonicalQNaN, dl, VT);
- ConstantFPSDNode *QNaNConstFP = cast<ConstantFPSDNode>(QuitNaN);
- SDValue QNanLoad = ExpandConstantFP(QNaNConstFP, true);
- DAG.ReplaceAllUsesWith(Node, QNanLoad.getNode());
- LLVM_DEBUG(dbgs() << "Legalized Signaling NaN to Quiet NaN\n");
- return;
- }
- } else if (Operand.isUndef()) {
- APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
- SDValue QuitNaN = DAG.getConstantFP(CanonicalQNaN, dl, VT);
- ConstantFPSDNode *QNaNConstFP = cast<ConstantFPSDNode>(QuitNaN);
- SDValue QNanLoad = ExpandConstantFP(QNaNConstFP, true);
- DAG.ReplaceAllUsesWith(Node, QNanLoad.getNode());
- LLVM_DEBUG(dbgs() << "Legalized Undef to Quiet NaN\n");
- return;
- }
- break;
- }
- break;
- }
case ISD::FSHL:
case ISD::FSHR:
case ISD::SRL_PARTS:
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 4bb8c9afd23edc..d0a54ab8993c26 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5271,52 +5271,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
}
break;
}
- case ISD::FCANONICALIZE: {
- SDValue Operand = Node->getOperand(0);
- EVT VT = Node->getValueType(0);
-
- // Perform canonicalization for constants. Replace the operand by a load
- // from constant pool for this constant. At this point subnoraml values like
- // denormals, snans have been canonicalized so no need to deal with those
- // cases.
- if (LoadSDNode *Load = dyn_cast<LoadSDNode>(Operand)) {
- const X86TargetLowering *X86Lowering =
- static_cast<const X86TargetLowering *>(TLI);
- if (const Constant *CV = X86Lowering->getTargetConstantFromLoad(Load)) {
- const ConstantFP *CFP = dyn_cast<ConstantFP>(CV);
- if (CFP) {
- ReplaceNode(Node, Load);
- return;
- }
- }
- }
-
- // Canonicalize normal non-constant/non-undef FP Nodes.
- SDValue MulNode;
- SDValue One;
- if (VT == MVT::f32 || VT == MVT::f64) {
- One = CurDAG->getConstantFP(1.0f, dl, VT);
- } else if (VT == MVT::f80) {
- APFloat Val = APFloat::getOne(APFloat::x87DoubleExtended());
- One = CurDAG->getConstantFP(Val, dl, VT);
- } else if (VT == MVT::f16) {
- APFloat Val(APFloat::IEEEhalf(), "1.0");
- One = CurDAG->getConstantFP(Val, dl, VT);
- } else if (VT == MVT::bf16) {
- APFloat Val(APFloat::BFloat(), "1.0");
- One = CurDAG->getConstantFP(Val, dl, VT);
- } else {
- // Is it better to assert? when we encounter an unknown FP type,Than to
- // just replace with the operand! As this might be our last attempt at
- // legalization.
- ReplaceNode(Node, Operand.getNode());
- return;
- }
- // TODO : Follow-up with tablegen pattern to generate mul * 1.0.
- MulNode = CurDAG->getNode(ISD::FMUL, dl, VT, Operand, One);
- ReplaceNode(Node, MulNode.getNode());
- return;
- }
case ISD::BRIND:
case X86ISD::NT_BRIND: {
if (Subtarget->isTargetNaCl())
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1a6be4eb5af1ef..4fc7c70764f565 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2533,6 +2533,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
ISD::STRICT_FMA,
ISD::FMINNUM,
ISD::FMAXNUM,
+ ISD::FCANONICALIZE,
ISD::SUB,
ISD::LOAD,
ISD::LRINT,
@@ -57976,6 +57977,124 @@ static SDValue combineINTRINSIC_VOID(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+SDValue combineConstantCanonicalize(SDNode *Node, SelectionDAG &DAG) {
+ SDValue Operand = Node->getOperand(0);
+ SDLoc dl(Node);
+ EVT VT = Operand.getValueType();
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Operand)) {
+ const APFloat &C = CFP->getValueAPF();
+ if (C.isDenormal()) {
+ DenormalMode Mode =
+ DAG.getMachineFunction().getDenormalMode(C.getSemantics());
+ assert((Mode != DenormalMode::getPositiveZero()) &&
+ "Positive denormal mode is not valid for X86 target.");
+ if (Mode == DenormalMode::getPreserveSign()) {
+ SDValue SDZero =
+ DAG.getConstantFP((C.isNegative() ? -0.0 : 0.0), dl, VT);
+ return SDZero;
+ } else if (Mode == DenormalMode::getIEEE()) {
+ return Operand;
+ }
+ } else if (C.isNaN() && C.isSignaling()) {
+ APFloat CanonicalQNaN = APFloat::getQNaN(C.getSemantics());
+ SDValue QuitNaN = DAG.getConstantFP(CanonicalQNaN, dl, VT);
+ return QuitNaN;
+ }
+ }
+ return Operand;
+}
+
+SDValue findLastStrictOpChain(SDNode *N, SelectionDAG &DAG) {
+ assert(N!=nullptr && "Trying to find last chain for a NULL Node");
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue Op = N->getOperand(i);
+ if (Op.getValueType() == MVT::Other && Op.getNode()->isStrictFPOpcode())
+ return Op;
+ }
+ return DAG.getEntryNode();
+}
+
+bool isNonCanonicalizingOperation(SDNode *N) {
+ assert(N!=nullptr && "Trying to check canonical opcode for a NULL Node");
+ unsigned Opc = N->getOpcode();
+ switch (Opc) {
+ // Ensure these are the exasustive set of non canonicalizing opcodes. Add more
+ // if not.
+ case X86::RET:
+ case ISD::STORE:
+ case ISD::SETCC:
+ case X86ISD::FCMP:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool isUsedByNonCanonicalizingOp(SDNode *N) {
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
+ ++UI) {
+ SDNode *User = *UI;
+ if (isNonCanonicalizingOperation(User))
+ return true;
+ }
+ return false;
+}
+
+SDValue combineCanonicalize(SDNode *Node, SelectionDAG &DAG) {
+ SDValue Operand = Node->getOperand(0);
+ EVT VT = Operand.getValueType();
+ SDLoc dl(Node);
+
+ if (auto *CFP = dyn_cast<ConstantFPSDNode>(Operand))
+ return combineConstantCanonicalize(Node, DAG);
+
+ if (Operand.isUndef()) {
+ APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
+ SDValue QuitNaN = DAG.getConstantFP(CanonicalQNaN, dl, VT);
+ return QuitNaN;
+ }
+
+ // Canonicalize scalar variable FP Nodes.
+ SDValue MulNode;
+ SDValue One;
+ if (VT == MVT::f32 || VT == MVT::f64) {
+ One = DAG.getConstantFP(1.0f, dl, VT);
+ } else if (VT == MVT::f80) {
+ APFloat Val = APFloat::getOne(APFloat::x87DoubleExtended());
+ One = DAG.getConstantFP(Val, dl, VT);
+ } else if (VT == MVT::f16) {
+ APFloat Val(APFloat::IEEEhalf(), "1.0");
+ One = DAG.getConstantFP(Val, dl, VT);
+ } else if (VT == MVT::bf16) {
+ APFloat Val(APFloat::BFloat(), "1.0");
+ One = DAG.getConstantFP(Val, dl, VT);
+ } else {
+ // Is it better to assert? when we encounter an unknown FP type,Than to
+ // just replace with the operand! As this might be our last attempt at
+ // legalization.
+ return Operand;
+ }
+
+ // Store, return, and compare are non-canonicalizing operations. If a
+ // non-canonicalizing operation uses the rest then mul * 1.0 must be generated
+ // int those cases.
+ // TODO: For now Preventing bf16 from generating strict_fmul as it
+ // leads to a crash SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul t0,
+ // ConstantFP:bf16<APFloat(16256)>, t5 LLVM ERROR: Do not know how to soft
+ // promote this operator's result!
+ if (isUsedByNonCanonicalizingOp(Node) && VT != MVT::bf16) {
+ SDValue Chain = findLastStrictOpChain(Node, DAG);
+ // TODO : Follow-up with tablegen pattern to generate mul * 1.0.
+ SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},
+ {Chain, One, Operand});
+
+ return StrictFmul;
+ }
+
+ return Operand;
+ // TODO : Hanlde vectors.
+}
+
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -58015,6 +58134,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget);
case ISD::OR: return combineOr(N, DAG, DCI, Subtarget);
case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget);
+ case ISD::FCANONICALIZE: return combineCanonicalize(N,DAG);
case ISD::BITREVERSE: return combineBITREVERSE(N, DAG, DCI, Subtarget);
case ISD::AVGCEILS:
case ISD::AVGCEILU:
diff --git a/llvm/test/CodeGen/X86/canonicalize-constants.ll b/llvm/test/CodeGen/X86/canonicalize-constants.ll
index b71c74bcd4472b..b1a9733806d40e 100644
--- a/llvm/test/CodeGen/X86/canonicalize-constants.ll
+++ b/llvm/test/CodeGen/X86/canonicalize-constants.ll
@@ -1,62 +1,185 @@
-; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5
+; RUN: llc -mattr=sse -mtriple=x86_64 < %s | FileCheck %s -check-prefix=SSE
+; RUN: llc -mattr=sse2 -mtriple=x86_64 < %s | FileCheck %s -check-prefix=SSE2
+; RUN: llc -mattr=+avx -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX
+; RUN: llc -mattr=+avx2 -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX2
+; RUN: llc -mattr=+avx512f -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX512F
+; RUN: llc -mattr=+avx512bw -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX512BW
define float @canon_fp32() {
- ; CHECK-LABEL: .LCPI0_0:
- ; CHECK: .long 0x40400000 # float 3
- ; CHECK-LABEL: canon_fp32
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: vmovss .LCPI0_0(%rip), %xmm0 # xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
- ; CHECK-NEXT: retq
+; SSE-LABEL: canon_fp32:
+; SSE: # %bb.0:
+; SSE-NEXT: movss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: canon_fp32:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: canon_fp32:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: canon_fp32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: canon_fp32:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: canon_fp32:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vmovss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX512BW-NEXT: retq
%canonicalized = call float @llvm.canonicalize.f32(float 3.0)
ret float %canonicalized
}
define half @canon_fp16() {
- ; CHECK-LABEL: .LCPI1_0:
- ; CHECK: .short 0x4200 # half 3
- ; CHECK-LABEL: canon_fp16
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: vmovsh .LCPI1_0(%rip), %xmm0
- ; CHECK-NEXT: retq
+; SSE-LABEL: canon_fp16:
+; SSE: # %bb.0:
+; SSE-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: canon_fp16:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: canon_fp16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: canon_fp16:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: canon_fp16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: canon_fp16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: retq
%canonicalized = call half @llvm.canonicalize.f16(half 0xH4200) ; half 3.0
ret half %canonicalized
}
define double @canon_fp64() {
- ; CHECK-LABEL: .LCPI2_0:
- ; CHECK: .quad 0x4008000000000000 # double 3
- ; CHECK-LABEL: canon_fp64
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: vmovsd .LCPI2_0(%rip), %xmm0
- ; CHECK-NEXT: retq
+; SSE-LABEL: canon_fp64:
+; SSE: # %bb.0:
+; SSE-NEXT: movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: canon_fp64:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: canon_fp64:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: canon_fp64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: canon_fp64:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: canon_fp64:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
+; AVX512BW-NEXT: retq
%canonicalized = call double @llvm.canonicalize.f64(double 3.0)
ret double %canonicalized
}
define x86_fp80 @canon_fp80() {
- ; CHECK-LABEL: .LCPI3_0:
- ; CHECK: .long 0x42b40000 # float 90
- ; CHECK-LABEL: canon_fp80
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: flds .LCPI3_0(%rip)
- ; CHECK-NEXT: retq
-
+; SSE-LABEL: canon_fp80:
+; SSE: # %bb.0:
+; SSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: canon_fp80:
+; SSE2: # %bb.0:
+; SSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: canon_fp80:
+; AVX: # %bb.0:
+; AVX-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: canon_fp80:
+; AVX2: # %bb.0:
+; AVX2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: canon_fp80:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: canon_fp80:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX512BW-NEXT: retq
%canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK4005B400000000000000) ; 90.0
ret x86_fp80 %canonicalized
}
define x86_fp80 @complex_canonicalize_x86_fp80(x86_fp80 %a, x86_fp80 %b) {
+; SSE-LABEL: complex_canonicalize_x86_fp80:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
+; SSE-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: complex_canonicalize_x86_fp80:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: fldt {{[0-9]+}}(%rsp)
+; SSE2-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: complex_canonicalize_x86_fp80:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: complex_canonicalize_x86_fp80:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX2-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: complex_canonicalize_x86_fp80:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: complex_canonicalize_x86_fp80:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512BW-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX512BW-NEXT: retq
entry:
- ; CHECK-LABEL: .LCPI4_0:
- ; CHECK: .long 0x42b40000 # float 90
- ; CHECK-LABEL: complex_canonicalize_x86_fp80
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: fldt 24(%rsp)
- ; CHECK-NEXT: flds .LCPI4_0(%rip)
- ; CHECK-NEXT: fsubp %st, %st(1)
- ; CHECK-NEXT: retq
-
%mul1 = fsub x86_fp80 %a, %b
%add = fadd x86_fp80 %mul1, %b
%mul2 = fsub x86_fp80 %add, %mul1
@@ -66,14 +189,36 @@ entry:
}
define double @complex_canonicalize_fp64(double %a, double %b) unnamed_addr #0 {
+; SSE-LABEL: complex_canonicalize_fp64:
+; SSE: # %bb.0: # %start
+; SSE-NEXT: movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: complex_canonicalize_fp64:
+; SSE2: # %bb.0: # %start
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: complex_canonicalize_fp64:
+; AVX: # %bb.0: # %start
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: complex_canonicalize_fp64:
+; AVX2: # %bb.0: # %start
+; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: complex_canonicalize_fp64:
+; AVX512F: # %bb.0: # %start
+; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: complex_canonicalize_fp64:
+; AVX512BW: # %bb.0: # %start
+; AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
+; AVX512BW-NEXT: retq
start:
- ; CHECK-LABEL: .LCPI5_0:
- ; CHECK: .quad 0x4008000000000000 # double 3
- ; CHECK-LABEL: complex_canonicalize_fp64
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: vmovsd .LCPI5_0(%rip), %xmm0
- ; CHECK-NEXT: retq
-
%c = fcmp olt double %a, %b
%d = fcmp uno double %a, 0.000000e+00
%or.cond.i.i = or i1 %d, %c
@@ -83,24 +228,70 @@ start:
}
define void @test_fold_canonicalize_p0_f32(float addrspace(1)* %out) #1 {
- ; CHECK-LAEBL: test_fold_canonicalize_p0_f32
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
- ; CHECK-NEXT: vmovss %xmm0, (%rdi)
- ; CHECK-NEXT: retq
+; SSE-LABEL: test_fold_canonicalize_p0_f32:
+; SSE: # %bb.0:
+; SSE-NEXT: movl $0, (%rdi)
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: test_fold_canonicalize_p0_f32:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movl $0, (%rdi)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: test_fold_canonicalize_p0_f32:
+; AVX: # %bb.0:
+; AVX-NEXT: movl $0, (%rdi)
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: test_fold_canonicalize_p0_f32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movl $0, (%rdi)
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test_fold_canonicalize_p0_f32:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: movl $0, (%rdi)
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: test_fold_canonicalize_p0_f32:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: movl $0, (%rdi)
+; AVX512BW-NEXT: retq
%canonicalized = call float @llvm.canonicalize.f32(float 0.0)
store float %canonicalized, float addrspace(1)* %out
ret void
}
define void @test_fold_canonicalize_n0_f32(float addrspace(1)* %out) #1 {
- ; CHECK-LAEBL: .LCPI7_0:
- ; CHECK: .long 0x80000000 # float -0
- ; CHECK-LAEBL: test_fold_canonicalize_n0_f32
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: vmovss .LCPI7_0(%rip), %xmm0
- ; CHECK-NEXT: vmovss %xmm0, (%rdi)
- ; CHECK-NEXT: retq
+; SSE-LABEL: test_fold_canonicalize_n0_f32:
+; SSE: # %bb.0:
+; SSE-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: test_fold_canonicalize_n0_f32:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: test_fold_canonicalize_n0_f32:
+; AVX: # %bb.0:
+; AVX-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: test_fold_canonicalize_n0_f32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test_fold_canonicalize_n0_f32:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: test_fold_canonicalize_n0_f32:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
+; AVX512BW-NEXT: retq
%canonicalized = call float @llvm.canonicalize.f32(float -0.0)
store float %canonicalized, float addrspace(1)* %out
ret void
@@ -108,27 +299,84 @@ define void @test_fold_canonicalize_n0_f32(float addrspace(1)* %out) #1 {
define void @v_test_canonicalize_p90_x86_fp80(x86_fp80 addrspace(1)* %out) #1 {
- ; CHECK-LAEBL: .LCPI8_0:
- ; CHECK: .long 0x42b40000 # float 90
- ; CHECK-LAEBL: v_test_canonicalize_p90_x86_fp80
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: flds .LCPI8_0(%rip)
- ; CHECK-NEXT: fstpt (%rdi)
- ; CHECK-NEXT: retq
+; SSE-LABEL: v_test_canonicalize_p90_x86_fp80:
+; SSE: # %bb.0:
+; SSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; SSE-NEXT: fstpt (%rdi)
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: v_test_canonicalize_p90_x86_fp80:
+; SSE2: # %bb.0:
+; SSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; SSE2-NEXT: fstpt (%rdi)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: v_test_canonicalize_p90_x86_fp80:
+; AVX: # %bb.0:
+; AVX-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX-NEXT: fstpt (%rdi)
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: v_test_canonicalize_p90_x86_fp80:
+; AVX2: # %bb.0:
+; AVX2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX2-NEXT: fstpt (%rdi)
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: v_test_canonicalize_p90_x86_fp80:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX512F-NEXT: fstpt (%rdi)
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: v_test_canonicalize_p90_x86_fp80:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX512BW-NEXT: fstpt (%rdi)
+; AVX512BW-NEXT: retq
%canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK4005B400000000000000)
store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out
ret void
}
define void @v_test_canonicalize_p3__half(half addrspace(1)* %out) {
- ; CHECK-LABEL: .LCPI9_0:
- ; CHECK: .short 0x4200 # half 3
- ; CHECK-LABEL: v_test_canonicalize_p3__half:
- ; CHECK: # %bb.0: # %entry
- ; CHECK-NEXT: vmovsh .LCPI9_0(%rip), %xmm0
- ; CHECK-NEXT: vmovsh %xmm0, (%rdi)
- ; CHECK-NEXT: retq
-
+; SSE-LABEL: v_test_canonicalize_p3__half:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: pextrw $0, %xmm0, %eax
+; SSE-NEXT: movw %ax, (%rdi)
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: v_test_canonicalize_p3__half:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pextrw $0, %xmm0, %eax
+; SSE2-NEXT: movw %ax, (%rdi)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: v_test_canonicalize_p3__half:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: v_test_canonicalize_p3__half:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: v_test_canonicalize_p3__half:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: v_test_canonicalize_p3__half:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
+; AVX512BW-NEXT: retq
entry:
%canonicalized = call half @llvm.canonicalize.f16(half 0xH4200)
store half %canonicalized, half addrspace(1)* %out
@@ -136,13 +384,41 @@ entry:
}
define void @v_test_canonicalize_p3_f64(double addrspace(1)* %out) #1 {
- ; CHECK-LABEL: .LCPI10_0:
- ; CHECK: .quad 0x4008000000000000 # double 3
- ; CHECK-LAEBL: v_test_canonicalize_p3_f64
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: vmovsd .LCPI10_0(%rip), %xmm0
- ; CHECK-NEXT: vmovsd %xmm0, (%rdi)
- ; CHECK-NEXT: retq
+; SSE-LABEL: v_test_canonicalize_p3_f64:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000
+; SSE-NEXT: movq %rax, (%rdi)
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: v_test_canonicalize_p3_f64:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000
+; SSE2-NEXT: movq %rax, (%rdi)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: v_test_canonicalize_p3_f64:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000
+; AVX-NEXT: movq %rax, (%rdi)
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: v_test_canonicalize_p3_f64:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000
+; AVX2-NEXT: movq %rax, (%rdi)
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: v_test_canonicalize_p3_f64:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000
+; AVX512F-NEXT: movq %rax, (%rdi)
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: v_test_canonicalize_p3_f64:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000
+; AVX512BW-NEXT: movq %rax, (%rdi)
+; AVX512BW-NEXT: retq
entry:
%canonicalized = call double @llvm.canonicalize.f64(double 3.0)
store double %canonicalized, double addrspace(1)* %out
@@ -150,15 +426,35 @@ entry:
}
define void @v_test_canonicalize_p3__bfloat(bfloat addrspace(1)* %out) {
- ; CHECK-LABEL: .LCPI11_0:
- ; CHECK: .long 0x40400000 # float 3
- ; CHECK-LABEL: v_test_canonicalize_p3__bfloat:
- ; CHECK: # %bb.0: # %entry
- ; CHECK-NEXT: vmovss .LCPI11_0(%rip), %xmm0 # xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
- ; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0
- ; CHECK-NEXT: vpextrw $0, %xmm0, (%rdi)
- ; CHECK-NEXT: retq
-
+; SSE-LABEL: v_test_canonicalize_p3__bfloat:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movw $16448, (%rdi) # imm = 0x4040
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: v_test_canonicalize_p3__bfloat:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: movw $16448, (%rdi) # imm = 0x4040
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: v_test_canonicalize_p3__bfloat:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: movw $16448, (%rdi) # imm = 0x4040
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: v_test_canonicalize_p3__bfloat:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: movw $16448, (%rdi) # imm = 0x4040
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: v_test_canonicalize_p3__bfloat:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: movw $16448, (%rdi) # imm = 0x4040
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: v_test_canonicalize_p3__bfloat:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: movw $16448, (%rdi) # imm = 0x4040
+; AVX512BW-NEXT: retq
entry:
%canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat 3.0)
store bfloat %canonicalized, bfloat addrspace(1)* %out
@@ -166,15 +462,35 @@ entry:
}
define void @v_test_canonicalize_n3__bfloat(bfloat addrspace(1)* %out) {
- ; CHECK-LABEL: .LCPI12_0:
- ; CHECK: .long 0xc0400000 # float -3
- ; CHECK-LABEL: v_test_canonicalize_n3__bfloat:
- ; CHECK: # %bb.0: # %entry
- ; CHECK-NEXT: vmovss .LCPI12_0(%rip), %xmm0 # xmm0 = [-3.0E+0,0.0E+0,0.0E+0,0.0E+0]
- ; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0
- ; CHECK-NEXT: vpextrw $0, %xmm0, (%rdi)
- ; CHECK-NEXT: retq
-
+; SSE-LABEL: v_test_canonicalize_n3__bfloat:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movw $-16320, (%rdi) # imm = 0xC040
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: v_test_canonicalize_n3__bfloat:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: movw $-16320, (%rdi) # imm = 0xC040
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: v_test_canonicalize_n3__bfloat:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: movw $-16320, (%rdi) # imm = 0xC040
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: v_test_canonicalize_n3__bfloat:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: movw $-16320, (%rdi) # imm = 0xC040
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: v_test_canonicalize_n3__bfloat:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: movw $-16320, (%rdi) # imm = 0xC040
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: v_test_canonicalize_n3__bfloat:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: movw $-16320, (%rdi) # imm = 0xC040
+; AVX512BW-NEXT: retq
entry:
%canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat -3.0)
store bfloat %canonicalized, bfloat addrspace(1)* %out
@@ -182,29 +498,86 @@ entry:
}
define void @v_test_canonicalize_n90_x86_fp80(x86_fp80 addrspace(1)* %out) #1 {
- ; CHECK-LAEBL: .LCPI13_0:
- ; CHECK: .long 0xc2b40000 # float -90
- ; CHECK-LAEBL: v_test_canonicalize_n90_x86_fp80
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: flds .LCPI13_0(%rip)
- ; CHECK-NEXT: fstpt (%rdi)
- ; CHECK-NEXT: retq
+; SSE-LABEL: v_test_canonicalize_n90_x86_fp80:
+; SSE: # %bb.0:
+; SSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; SSE-NEXT: fstpt (%rdi)
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: v_test_canonicalize_n90_x86_fp80:
+; SSE2: # %bb.0:
+; SSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; SSE2-NEXT: fstpt (%rdi)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: v_test_canonicalize_n90_x86_fp80:
+; AVX: # %bb.0:
+; AVX-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX-NEXT: fstpt (%rdi)
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: v_test_canonicalize_n90_x86_fp80:
+; AVX2: # %bb.0:
+; AVX2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX2-NEXT: fstpt (%rdi)
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: v_test_canonicalize_n90_x86_fp80:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX512F-NEXT: fstpt (%rdi)
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: v_test_canonicalize_n90_x86_fp80:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX512BW-NEXT: fstpt (%rdi)
+; AVX512BW-NEXT: retq
%canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xKC005B400000000000000)
store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out
ret void
}
define void @v_test_canonicalize_n3__half(half addrspace(1)* %out) {
- ; CHECK-LABEL: .LCPI14_0:
- ; CHECK: .short 0xc200 # half -3
- ; CHECK-LABEL: v_test_canonicalize_n3__half:
- ; CHECK: # %bb.0: # %entry
- ; CHECK-NEXT: vmovsh .LCPI14_0(%rip), %xmm0
- ; CHECK-NEXT: vmovsh %xmm0, (%rdi)
- ; CHECK-NEXT: retq
-
+; SSE-LABEL: v_test_canonicalize_n3__half:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: pextrw $0, %xmm0, %eax
+; SSE-NEXT: movw %ax, (%rdi)
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: v_test_canonicalize_n3__half:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pextrw $0, %xmm0, %eax
+; SSE2-NEXT: movw %ax, (%rdi)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: v_test_canonicalize_n3__half:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: v_test_canonicalize_n3__half:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: v_test_canonicalize_n3__half:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: v_test_canonicalize_n3__half:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
+; AVX512BW-NEXT: retq
entry:
%canonicalized = call half @llvm.canonicalize.f16(half 0xHC200)
store half %canonicalized, half addrspace(1)* %out
ret void
-}
\ No newline at end of file
+}
diff --git a/llvm/test/CodeGen/X86/canonicalize-subnormals.ll b/llvm/test/CodeGen/X86/canonicalize-subnormals.ll
index 8e7e04c2a67dc8..034da96271eb85 100644
--- a/llvm/test/CodeGen/X86/canonicalize-subnormals.ll
+++ b/llvm/test/CodeGen/X86/canonicalize-subnormals.ll
@@ -1,30 +1,269 @@
-; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck %s
-; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL %s
-; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5
+; RUN: llc -mattr=sse2 -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=PRE-SIGN-SSE2 %s
+; RUN: llc -mattr=sse2 -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL-SSE2 %s
+; RUN: llc -mattr=sse2 -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL-SSE2 %s
+; RUN: llc -mattr=+avx -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=PRE-SIGN-AVX %s
+; RUN: llc -mattr=+avx -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL-AVX %s
+; RUN: llc -mattr=+avx -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL-AVX %s
+; RUN: llc -mattr=+avx2 -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=PRE-SIGN-AVX2 %s
+; RUN: llc -mattr=+avx2 -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL-AVX2 %s
+; RUN: llc -mattr=+avx2 -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL-AVX2 %s
+; RUN: llc -mattr=+avx512f -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=PRE-SIGN-AVX512F %s
+; RUN: llc -mattr=+avx512f -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL-AVX512F %s
+; RUN: llc -mattr=+avx512f -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL-AVX512F %s
+; RUN: llc -mattr=+avx512bw -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=PRE-SIGN-AVX512BW %s
+; RUN: llc -mattr=+avx512bw -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL-AVX512BW %s
+; RUN: llc -mattr=+avx512bw -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL-AVX512BW %s
-define void @canonicalize_denormal1_f32_pre_sign(float addrspace(1)* %out) {
- ; CHECK-LABEL: .LCPI0_0:
- ; CHECK: .long 0x80000000 # float -0
- ; CHECK-LABEL: canonicalize_denormal1_f32_pre_sign:
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: vmovss .LCPI0_0(%rip), %xmm0
- ; CHECK-NEXT: vmovss %xmm0, (%rdi)
- ; CHECK-NEXT: retq
+define double @test_bad_subnormal() {
+; PRE-SIGN-SSE2-LABEL: test_bad_subnormal:
+; PRE-SIGN-SSE2: # %bb.0:
+; PRE-SIGN-SSE2-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
+; PRE-SIGN-SSE2-NEXT: retq
+;
+; IEEE-DENORMAL-SSE2-LABEL: test_bad_subnormal:
+; IEEE-DENORMAL-SSE2: # %bb.0:
+; IEEE-DENORMAL-SSE2-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
+; IEEE-DENORMAL-SSE2-NEXT: retq
+;
+; DYN-DENORMAL-SSE2-LABEL: test_bad_subnormal:
+; DYN-DENORMAL-SSE2: # %bb.0:
+; DYN-DENORMAL-SSE2-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
+; DYN-DENORMAL-SSE2-NEXT: retq
+;
+; PRE-SIGN-AVX-LABEL: test_bad_subnormal:
+; PRE-SIGN-AVX: # %bb.0:
+; PRE-SIGN-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
+; PRE-SIGN-AVX-NEXT: retq
+;
+; IEEE-DENORMAL-AVX-LABEL: test_bad_subnormal:
+; IEEE-DENORMAL-AVX: # %bb.0:
+; IEEE-DENORMAL-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
+; IEEE-DENORMAL-AVX-NEXT: retq
+;
+; DYN-DENORMAL-AVX-LABEL: test_bad_subnormal:
+; DYN-DENORMAL-AVX: # %bb.0:
+; DYN-DENORMAL-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
+; DYN-DENORMAL-AVX-NEXT: retq
+;
+; PRE-SIGN-AVX2-LABEL: test_bad_subnormal:
+; PRE-SIGN-AVX2: # %bb.0:
+; PRE-SIGN-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
+; PRE-SIGN-AVX2-NEXT: retq
+;
+; IEEE-DENORMAL-AVX2-LABEL: test_bad_subnormal:
+; IEEE-DENORMAL-AVX2: # %bb.0:
+; IEEE-DENORMAL-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
+; IEEE-DENORMAL-AVX2-NEXT: retq
+;
+; DYN-DENORMAL-AVX2-LABEL: test_bad_subnormal:
+; DYN-DENORMAL-AVX2: # %bb.0:
+; DYN-DENORMAL-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
+; DYN-DENORMAL-AVX2-NEXT: retq
+;
+; PRE-SIGN-AVX512F-LABEL: test_bad_subnormal:
+; PRE-SIGN-AVX512F: # %bb.0:
+; PRE-SIGN-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
+; PRE-SIGN-AVX512F-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512F-LABEL: test_bad_subnormal:
+; IEEE-DENORMAL-AVX512F: # %bb.0:
+; IEEE-DENORMAL-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
+; IEEE-DENORMAL-AVX512F-NEXT: retq
+;
+; DYN-DENORMAL-AVX512F-LABEL: test_bad_subnormal:
+; DYN-DENORMAL-AVX512F: # %bb.0:
+; DYN-DENORMAL-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
+; DYN-DENORMAL-AVX512F-NEXT: retq
+;
+; PRE-SIGN-AVX512BW-LABEL: test_bad_subnormal:
+; PRE-SIGN-AVX512BW: # %bb.0:
+; PRE-SIGN-AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
+; PRE-SIGN-AVX512BW-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512BW-LABEL: test_bad_subnormal:
+; IEEE-DENORMAL-AVX512BW: # %bb.0:
+; IEEE-DENORMAL-AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
+; IEEE-DENORMAL-AVX512BW-NEXT: retq
+;
+; DYN-DENORMAL-AVX512BW-LABEL: test_bad_subnormal:
+; DYN-DENORMAL-AVX512BW: # %bb.0:
+; DYN-DENORMAL-AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
+; DYN-DENORMAL-AVX512BW-NEXT: retq
+ %canon = call double @llvm.canonicalize(double 0x7ff8000000000001) ; Nan
+ ret double %canon
+}
+define void @canonicalize_denormal1_f32_pre_sign(float addrspace(1)* %out) {
+; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f32_pre_sign:
+; PRE-SIGN-SSE2: # %bb.0:
+; PRE-SIGN-SSE2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
+; PRE-SIGN-SSE2-NEXT: retq
+;
+; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_pre_sign:
+; IEEE-DENORMAL-SSE2: # %bb.0:
+; IEEE-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; IEEE-DENORMAL-SSE2-NEXT: retq
+;
+; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_pre_sign:
+; DYN-DENORMAL-SSE2: # %bb.0:
+; DYN-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; DYN-DENORMAL-SSE2-NEXT: retq
+;
+; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f32_pre_sign:
+; PRE-SIGN-AVX: # %bb.0:
+; PRE-SIGN-AVX-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
+; PRE-SIGN-AVX-NEXT: retq
+;
+; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_pre_sign:
+; IEEE-DENORMAL-AVX: # %bb.0:
+; IEEE-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; IEEE-DENORMAL-AVX-NEXT: retq
+;
+; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_pre_sign:
+; DYN-DENORMAL-AVX: # %bb.0:
+; DYN-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; DYN-DENORMAL-AVX-NEXT: retq
+;
+; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f32_pre_sign:
+; PRE-SIGN-AVX2: # %bb.0:
+; PRE-SIGN-AVX2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
+; PRE-SIGN-AVX2-NEXT: retq
+;
+; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_pre_sign:
+; IEEE-DENORMAL-AVX2: # %bb.0:
+; IEEE-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; IEEE-DENORMAL-AVX2-NEXT: retq
+;
+; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_pre_sign:
+; DYN-DENORMAL-AVX2: # %bb.0:
+; DYN-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; DYN-DENORMAL-AVX2-NEXT: retq
+;
+; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f32_pre_sign:
+; PRE-SIGN-AVX512F: # %bb.0:
+; PRE-SIGN-AVX512F-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
+; PRE-SIGN-AVX512F-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_pre_sign:
+; IEEE-DENORMAL-AVX512F: # %bb.0:
+; IEEE-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; IEEE-DENORMAL-AVX512F-NEXT: retq
+;
+; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_pre_sign:
+; DYN-DENORMAL-AVX512F: # %bb.0:
+; DYN-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; DYN-DENORMAL-AVX512F-NEXT: retq
+;
+; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f32_pre_sign:
+; PRE-SIGN-AVX512BW: # %bb.0:
+; PRE-SIGN-AVX512BW-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
+; PRE-SIGN-AVX512BW-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_pre_sign:
+; IEEE-DENORMAL-AVX512BW: # %bb.0:
+; IEEE-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; IEEE-DENORMAL-AVX512BW-NEXT: retq
+;
+; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_pre_sign:
+; DYN-DENORMAL-AVX512BW: # %bb.0:
+; DYN-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; DYN-DENORMAL-AVX512BW-NEXT: retq
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
store float %canonicalized, float addrspace(1)* %out
ret void
}
define void @canonicalize_denormal1_f64_pre_sign(double addrspace(1)* %out) {
- ; CHECK-LABEL: .LCPI1_0:
- ; CHECK: .quad 0x8000000000000000 # double -0
- ; CHECK-LABEL: canonicalize_denormal1_f64_pre_sign:
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: vmovsd .LCPI1_0(%rip), %xmm0
- ; CHECK-NEXT: vmovsd %xmm0, (%rdi)
- ; CHECK-NEXT: retq
-
+; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f64_pre_sign:
+; PRE-SIGN-SSE2: # %bb.0:
+; PRE-SIGN-SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-SSE2-NEXT: retq
+;
+; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_pre_sign:
+; IEEE-DENORMAL-SSE2: # %bb.0:
+; IEEE-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-SSE2-NEXT: retq
+;
+; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_pre_sign:
+; DYN-DENORMAL-SSE2: # %bb.0:
+; DYN-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-SSE2-NEXT: retq
+;
+; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f64_pre_sign:
+; PRE-SIGN-AVX: # %bb.0:
+; PRE-SIGN-AVX-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX-NEXT: retq
+;
+; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_pre_sign:
+; IEEE-DENORMAL-AVX: # %bb.0:
+; IEEE-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX-NEXT: retq
+;
+; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_pre_sign:
+; DYN-DENORMAL-AVX: # %bb.0:
+; DYN-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX-NEXT: retq
+;
+; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f64_pre_sign:
+; PRE-SIGN-AVX2: # %bb.0:
+; PRE-SIGN-AVX2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX2-NEXT: retq
+;
+; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_pre_sign:
+; IEEE-DENORMAL-AVX2: # %bb.0:
+; IEEE-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX2-NEXT: retq
+;
+; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_pre_sign:
+; DYN-DENORMAL-AVX2: # %bb.0:
+; DYN-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX2-NEXT: retq
+;
+; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f64_pre_sign:
+; PRE-SIGN-AVX512F: # %bb.0:
+; PRE-SIGN-AVX512F-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX512F-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_pre_sign:
+; IEEE-DENORMAL-AVX512F: # %bb.0:
+; IEEE-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX512F-NEXT: retq
+;
+; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_pre_sign:
+; DYN-DENORMAL-AVX512F: # %bb.0:
+; DYN-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX512F-NEXT: retq
+;
+; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f64_pre_sign:
+; PRE-SIGN-AVX512BW: # %bb.0:
+; PRE-SIGN-AVX512BW-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX512BW-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_pre_sign:
+; IEEE-DENORMAL-AVX512BW: # %bb.0:
+; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX512BW-NEXT: retq
+;
+; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_pre_sign:
+; DYN-DENORMAL-AVX512BW: # %bb.0:
+; DYN-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX512BW-NEXT: retq
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
store double %canonicalized, double addrspace(1)* %out
ret void
@@ -32,141 +271,875 @@ define void @canonicalize_denormal1_f64_pre_sign(double addrspace(1)* %out) {
define void @canonicalize_qnan_f64(double addrspace(1)* %out) {
- ;cCHECK-LABEL: .LCPI2_0:
- ;cCHECK: .quad 0x7ff8000000000000 # double NaN
- ; CHECK-LABEL: canonicalize_qnan_f64:
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: vmovsd .LCPI2_0(%rip), %xmm0
- ; CHECK-NEXT: vmovsd %xmm0, (%rdi)
- ; CHECK-NEXT: retq
-
+; PRE-SIGN-SSE2-LABEL: canonicalize_qnan_f64:
+; PRE-SIGN-SSE2: # %bb.0:
+; PRE-SIGN-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-SSE2-NEXT: retq
+;
+; IEEE-DENORMAL-SSE2-LABEL: canonicalize_qnan_f64:
+; IEEE-DENORMAL-SSE2: # %bb.0:
+; IEEE-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-SSE2-NEXT: retq
+;
+; DYN-DENORMAL-SSE2-LABEL: canonicalize_qnan_f64:
+; DYN-DENORMAL-SSE2: # %bb.0:
+; DYN-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-SSE2-NEXT: retq
+;
+; PRE-SIGN-AVX-LABEL: canonicalize_qnan_f64:
+; PRE-SIGN-AVX: # %bb.0:
+; PRE-SIGN-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX-NEXT: retq
+;
+; IEEE-DENORMAL-AVX-LABEL: canonicalize_qnan_f64:
+; IEEE-DENORMAL-AVX: # %bb.0:
+; IEEE-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX-NEXT: retq
+;
+; DYN-DENORMAL-AVX-LABEL: canonicalize_qnan_f64:
+; DYN-DENORMAL-AVX: # %bb.0:
+; DYN-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX-NEXT: retq
+;
+; PRE-SIGN-AVX2-LABEL: canonicalize_qnan_f64:
+; PRE-SIGN-AVX2: # %bb.0:
+; PRE-SIGN-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX2-NEXT: retq
+;
+; IEEE-DENORMAL-AVX2-LABEL: canonicalize_qnan_f64:
+; IEEE-DENORMAL-AVX2: # %bb.0:
+; IEEE-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX2-NEXT: retq
+;
+; DYN-DENORMAL-AVX2-LABEL: canonicalize_qnan_f64:
+; DYN-DENORMAL-AVX2: # %bb.0:
+; DYN-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX2-NEXT: retq
+;
+; PRE-SIGN-AVX512F-LABEL: canonicalize_qnan_f64:
+; PRE-SIGN-AVX512F: # %bb.0:
+; PRE-SIGN-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX512F-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_qnan_f64:
+; IEEE-DENORMAL-AVX512F: # %bb.0:
+; IEEE-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX512F-NEXT: retq
+;
+; DYN-DENORMAL-AVX512F-LABEL: canonicalize_qnan_f64:
+; DYN-DENORMAL-AVX512F: # %bb.0:
+; DYN-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX512F-NEXT: retq
+;
+; PRE-SIGN-AVX512BW-LABEL: canonicalize_qnan_f64:
+; PRE-SIGN-AVX512BW: # %bb.0:
+; PRE-SIGN-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX512BW-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_f64:
+; IEEE-DENORMAL-AVX512BW: # %bb.0:
+; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX512BW-NEXT: retq
+;
+; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_f64:
+; DYN-DENORMAL-AVX512BW: # %bb.0:
+; DYN-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX512BW-NEXT: retq
%canonicalized = call double @llvm.canonicalize.f64(double 0x7FF8000000000000)
store double %canonicalized, double addrspace(1)* %out
ret void
}
define void @canonicalize_qnan_value_neg1_f64(double addrspace(1)* %out) {
- ;cCHECK-LABEL: .LCPI3_0:
- ;cCHECK: .quad 0xffffffffffffffff # double NaN
- ; CHECK-LABEL: canonicalize_qnan_value_neg1_f64:
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: vmovsd .LCPI3_0(%rip), %xmm0
- ; CHECK-NEXT: vmovsd %xmm0, (%rdi)
- ; CHECK-NEXT: retq
-
+; PRE-SIGN-SSE2-LABEL: canonicalize_qnan_value_neg1_f64:
+; PRE-SIGN-SSE2: # %bb.0:
+; PRE-SIGN-SSE2-NEXT: movq $-1, (%rdi)
+; PRE-SIGN-SSE2-NEXT: retq
+;
+; IEEE-DENORMAL-SSE2-LABEL: canonicalize_qnan_value_neg1_f64:
+; IEEE-DENORMAL-SSE2: # %bb.0:
+; IEEE-DENORMAL-SSE2-NEXT: movq $-1, (%rdi)
+; IEEE-DENORMAL-SSE2-NEXT: retq
+;
+; DYN-DENORMAL-SSE2-LABEL: canonicalize_qnan_value_neg1_f64:
+; DYN-DENORMAL-SSE2: # %bb.0:
+; DYN-DENORMAL-SSE2-NEXT: movq $-1, (%rdi)
+; DYN-DENORMAL-SSE2-NEXT: retq
+;
+; PRE-SIGN-AVX-LABEL: canonicalize_qnan_value_neg1_f64:
+; PRE-SIGN-AVX: # %bb.0:
+; PRE-SIGN-AVX-NEXT: movq $-1, (%rdi)
+; PRE-SIGN-AVX-NEXT: retq
+;
+; IEEE-DENORMAL-AVX-LABEL: canonicalize_qnan_value_neg1_f64:
+; IEEE-DENORMAL-AVX: # %bb.0:
+; IEEE-DENORMAL-AVX-NEXT: movq $-1, (%rdi)
+; IEEE-DENORMAL-AVX-NEXT: retq
+;
+; DYN-DENORMAL-AVX-LABEL: canonicalize_qnan_value_neg1_f64:
+; DYN-DENORMAL-AVX: # %bb.0:
+; DYN-DENORMAL-AVX-NEXT: movq $-1, (%rdi)
+; DYN-DENORMAL-AVX-NEXT: retq
+;
+; PRE-SIGN-AVX2-LABEL: canonicalize_qnan_value_neg1_f64:
+; PRE-SIGN-AVX2: # %bb.0:
+; PRE-SIGN-AVX2-NEXT: movq $-1, (%rdi)
+; PRE-SIGN-AVX2-NEXT: retq
+;
+; IEEE-DENORMAL-AVX2-LABEL: canonicalize_qnan_value_neg1_f64:
+; IEEE-DENORMAL-AVX2: # %bb.0:
+; IEEE-DENORMAL-AVX2-NEXT: movq $-1, (%rdi)
+; IEEE-DENORMAL-AVX2-NEXT: retq
+;
+; DYN-DENORMAL-AVX2-LABEL: canonicalize_qnan_value_neg1_f64:
+; DYN-DENORMAL-AVX2: # %bb.0:
+; DYN-DENORMAL-AVX2-NEXT: movq $-1, (%rdi)
+; DYN-DENORMAL-AVX2-NEXT: retq
+;
+; PRE-SIGN-AVX512F-LABEL: canonicalize_qnan_value_neg1_f64:
+; PRE-SIGN-AVX512F: # %bb.0:
+; PRE-SIGN-AVX512F-NEXT: movq $-1, (%rdi)
+; PRE-SIGN-AVX512F-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_qnan_value_neg1_f64:
+; IEEE-DENORMAL-AVX512F: # %bb.0:
+; IEEE-DENORMAL-AVX512F-NEXT: movq $-1, (%rdi)
+; IEEE-DENORMAL-AVX512F-NEXT: retq
+;
+; DYN-DENORMAL-AVX512F-LABEL: canonicalize_qnan_value_neg1_f64:
+; DYN-DENORMAL-AVX512F: # %bb.0:
+; DYN-DENORMAL-AVX512F-NEXT: movq $-1, (%rdi)
+; DYN-DENORMAL-AVX512F-NEXT: retq
+;
+; PRE-SIGN-AVX512BW-LABEL: canonicalize_qnan_value_neg1_f64:
+; PRE-SIGN-AVX512BW: # %bb.0:
+; PRE-SIGN-AVX512BW-NEXT: movq $-1, (%rdi)
+; PRE-SIGN-AVX512BW-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_value_neg1_f64:
+; IEEE-DENORMAL-AVX512BW: # %bb.0:
+; IEEE-DENORMAL-AVX512BW-NEXT: movq $-1, (%rdi)
+; IEEE-DENORMAL-AVX512BW-NEXT: retq
+;
+; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_value_neg1_f64:
+; DYN-DENORMAL-AVX512BW: # %bb.0:
+; DYN-DENORMAL-AVX512BW-NEXT: movq $-1, (%rdi)
+; DYN-DENORMAL-AVX512BW-NEXT: retq
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -1 to double))
store double %canonicalized, double addrspace(1)* %out
ret void
}
define void @canonicalize_qnan_value_neg2_f64(double addrspace(1)* %out) {
- ; CHECK-LABEL: .LCPI4_0:
- ; CHECK: .quad 0xfffffffffffffffe # double NaN
- ; CHECK-LABEL: canonicalize_qnan_value_neg2_f64:
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: vmovsd .LCPI4_0(%rip), %xmm0
- ; CHECK-NEXT: vmovsd %xmm0, (%rdi)
- ; CHECK-NEXT: retq
-
+; PRE-SIGN-SSE2-LABEL: canonicalize_qnan_value_neg2_f64:
+; PRE-SIGN-SSE2: # %bb.0:
+; PRE-SIGN-SSE2-NEXT: movq $-2, (%rdi)
+; PRE-SIGN-SSE2-NEXT: retq
+;
+; IEEE-DENORMAL-SSE2-LABEL: canonicalize_qnan_value_neg2_f64:
+; IEEE-DENORMAL-SSE2: # %bb.0:
+; IEEE-DENORMAL-SSE2-NEXT: movq $-2, (%rdi)
+; IEEE-DENORMAL-SSE2-NEXT: retq
+;
+; DYN-DENORMAL-SSE2-LABEL: canonicalize_qnan_value_neg2_f64:
+; DYN-DENORMAL-SSE2: # %bb.0:
+; DYN-DENORMAL-SSE2-NEXT: movq $-2, (%rdi)
+; DYN-DENORMAL-SSE2-NEXT: retq
+;
+; PRE-SIGN-AVX-LABEL: canonicalize_qnan_value_neg2_f64:
+; PRE-SIGN-AVX: # %bb.0:
+; PRE-SIGN-AVX-NEXT: movq $-2, (%rdi)
+; PRE-SIGN-AVX-NEXT: retq
+;
+; IEEE-DENORMAL-AVX-LABEL: canonicalize_qnan_value_neg2_f64:
+; IEEE-DENORMAL-AVX: # %bb.0:
+; IEEE-DENORMAL-AVX-NEXT: movq $-2, (%rdi)
+; IEEE-DENORMAL-AVX-NEXT: retq
+;
+; DYN-DENORMAL-AVX-LABEL: canonicalize_qnan_value_neg2_f64:
+; DYN-DENORMAL-AVX: # %bb.0:
+; DYN-DENORMAL-AVX-NEXT: movq $-2, (%rdi)
+; DYN-DENORMAL-AVX-NEXT: retq
+;
+; PRE-SIGN-AVX2-LABEL: canonicalize_qnan_value_neg2_f64:
+; PRE-SIGN-AVX2: # %bb.0:
+; PRE-SIGN-AVX2-NEXT: movq $-2, (%rdi)
+; PRE-SIGN-AVX2-NEXT: retq
+;
+; IEEE-DENORMAL-AVX2-LABEL: canonicalize_qnan_value_neg2_f64:
+; IEEE-DENORMAL-AVX2: # %bb.0:
+; IEEE-DENORMAL-AVX2-NEXT: movq $-2, (%rdi)
+; IEEE-DENORMAL-AVX2-NEXT: retq
+;
+; DYN-DENORMAL-AVX2-LABEL: canonicalize_qnan_value_neg2_f64:
+; DYN-DENORMAL-AVX2: # %bb.0:
+; DYN-DENORMAL-AVX2-NEXT: movq $-2, (%rdi)
+; DYN-DENORMAL-AVX2-NEXT: retq
+;
+; PRE-SIGN-AVX512F-LABEL: canonicalize_qnan_value_neg2_f64:
+; PRE-SIGN-AVX512F: # %bb.0:
+; PRE-SIGN-AVX512F-NEXT: movq $-2, (%rdi)
+; PRE-SIGN-AVX512F-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_qnan_value_neg2_f64:
+; IEEE-DENORMAL-AVX512F: # %bb.0:
+; IEEE-DENORMAL-AVX512F-NEXT: movq $-2, (%rdi)
+; IEEE-DENORMAL-AVX512F-NEXT: retq
+;
+; DYN-DENORMAL-AVX512F-LABEL: canonicalize_qnan_value_neg2_f64:
+; DYN-DENORMAL-AVX512F: # %bb.0:
+; DYN-DENORMAL-AVX512F-NEXT: movq $-2, (%rdi)
+; DYN-DENORMAL-AVX512F-NEXT: retq
+;
+; PRE-SIGN-AVX512BW-LABEL: canonicalize_qnan_value_neg2_f64:
+; PRE-SIGN-AVX512BW: # %bb.0:
+; PRE-SIGN-AVX512BW-NEXT: movq $-2, (%rdi)
+; PRE-SIGN-AVX512BW-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_value_neg2_f64:
+; IEEE-DENORMAL-AVX512BW: # %bb.0:
+; IEEE-DENORMAL-AVX512BW-NEXT: movq $-2, (%rdi)
+; IEEE-DENORMAL-AVX512BW-NEXT: retq
+;
+; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_value_neg2_f64:
+; DYN-DENORMAL-AVX512BW: # %bb.0:
+; DYN-DENORMAL-AVX512BW-NEXT: movq $-2, (%rdi)
+; DYN-DENORMAL-AVX512BW-NEXT: retq
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -2 to double))
store double %canonicalized, double addrspace(1)* %out
ret void
}
define void @canonicalize_snan0_value_f64(double addrspace(1)* %out) {
- ; CHECK-LABEL: .LCPI5_0:
- ; CHECK: .quad 0x7ff8000000000000 # double NaN
- ; CHECK-LABEL: canonicalize_snan0_value_f64:
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: vmovsd .LCPI5_0(%rip), %xmm0
- ; CHECK-NEXT: vmovsd %xmm0, (%rdi)
- ; CHECK-NEXT: retq
-
+; PRE-SIGN-SSE2-LABEL: canonicalize_snan0_value_f64:
+; PRE-SIGN-SSE2: # %bb.0:
+; PRE-SIGN-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-SSE2-NEXT: retq
+;
+; IEEE-DENORMAL-SSE2-LABEL: canonicalize_snan0_value_f64:
+; IEEE-DENORMAL-SSE2: # %bb.0:
+; IEEE-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-SSE2-NEXT: retq
+;
+; DYN-DENORMAL-SSE2-LABEL: canonicalize_snan0_value_f64:
+; DYN-DENORMAL-SSE2: # %bb.0:
+; DYN-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-SSE2-NEXT: retq
+;
+; PRE-SIGN-AVX-LABEL: canonicalize_snan0_value_f64:
+; PRE-SIGN-AVX: # %bb.0:
+; PRE-SIGN-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX-NEXT: retq
+;
+; IEEE-DENORMAL-AVX-LABEL: canonicalize_snan0_value_f64:
+; IEEE-DENORMAL-AVX: # %bb.0:
+; IEEE-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX-NEXT: retq
+;
+; DYN-DENORMAL-AVX-LABEL: canonicalize_snan0_value_f64:
+; DYN-DENORMAL-AVX: # %bb.0:
+; DYN-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX-NEXT: retq
+;
+; PRE-SIGN-AVX2-LABEL: canonicalize_snan0_value_f64:
+; PRE-SIGN-AVX2: # %bb.0:
+; PRE-SIGN-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX2-NEXT: retq
+;
+; IEEE-DENORMAL-AVX2-LABEL: canonicalize_snan0_value_f64:
+; IEEE-DENORMAL-AVX2: # %bb.0:
+; IEEE-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX2-NEXT: retq
+;
+; DYN-DENORMAL-AVX2-LABEL: canonicalize_snan0_value_f64:
+; DYN-DENORMAL-AVX2: # %bb.0:
+; DYN-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX2-NEXT: retq
+;
+; PRE-SIGN-AVX512F-LABEL: canonicalize_snan0_value_f64:
+; PRE-SIGN-AVX512F: # %bb.0:
+; PRE-SIGN-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX512F-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_snan0_value_f64:
+; IEEE-DENORMAL-AVX512F: # %bb.0:
+; IEEE-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX512F-NEXT: retq
+;
+; DYN-DENORMAL-AVX512F-LABEL: canonicalize_snan0_value_f64:
+; DYN-DENORMAL-AVX512F: # %bb.0:
+; DYN-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX512F-NEXT: retq
+;
+; PRE-SIGN-AVX512BW-LABEL: canonicalize_snan0_value_f64:
+; PRE-SIGN-AVX512BW: # %bb.0:
+; PRE-SIGN-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX512BW-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_snan0_value_f64:
+; IEEE-DENORMAL-AVX512BW: # %bb.0:
+; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX512BW-NEXT: retq
+;
+; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_snan0_value_f64:
+; DYN-DENORMAL-AVX512BW: # %bb.0:
+; DYN-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX512BW-NEXT: retq
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9218868437227405313 to double))
store double %canonicalized, double addrspace(1)* %out
ret void
}
define void @canonicalize_undef(double addrspace(1)* %out) {
- ; CHECK-LABEL: .LCPI6_0:
- ; CHECK: .quad 0x7ff8000000000000 # double NaN
- ; CHECK-LABEL: canonicalize_undef:
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: vmovsd .LCPI6_0(%rip), %xmm0
- ; CHECK-NEXT: vmovsd %xmm0, (%rdi)
- ; CHECK-NEXT: retq
-
+; PRE-SIGN-SSE2-LABEL: canonicalize_undef:
+; PRE-SIGN-SSE2: # %bb.0:
+; PRE-SIGN-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-SSE2-NEXT: retq
+;
+; IEEE-DENORMAL-SSE2-LABEL: canonicalize_undef:
+; IEEE-DENORMAL-SSE2: # %bb.0:
+; IEEE-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-SSE2-NEXT: retq
+;
+; DYN-DENORMAL-SSE2-LABEL: canonicalize_undef:
+; DYN-DENORMAL-SSE2: # %bb.0:
+; DYN-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-SSE2-NEXT: retq
+;
+; PRE-SIGN-AVX-LABEL: canonicalize_undef:
+; PRE-SIGN-AVX: # %bb.0:
+; PRE-SIGN-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX-NEXT: retq
+;
+; IEEE-DENORMAL-AVX-LABEL: canonicalize_undef:
+; IEEE-DENORMAL-AVX: # %bb.0:
+; IEEE-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX-NEXT: retq
+;
+; DYN-DENORMAL-AVX-LABEL: canonicalize_undef:
+; DYN-DENORMAL-AVX: # %bb.0:
+; DYN-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX-NEXT: retq
+;
+; PRE-SIGN-AVX2-LABEL: canonicalize_undef:
+; PRE-SIGN-AVX2: # %bb.0:
+; PRE-SIGN-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX2-NEXT: retq
+;
+; IEEE-DENORMAL-AVX2-LABEL: canonicalize_undef:
+; IEEE-DENORMAL-AVX2: # %bb.0:
+; IEEE-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX2-NEXT: retq
+;
+; DYN-DENORMAL-AVX2-LABEL: canonicalize_undef:
+; DYN-DENORMAL-AVX2: # %bb.0:
+; DYN-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX2-NEXT: retq
+;
+; PRE-SIGN-AVX512F-LABEL: canonicalize_undef:
+; PRE-SIGN-AVX512F: # %bb.0:
+; PRE-SIGN-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX512F-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_undef:
+; IEEE-DENORMAL-AVX512F: # %bb.0:
+; IEEE-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX512F-NEXT: retq
+;
+; DYN-DENORMAL-AVX512F-LABEL: canonicalize_undef:
+; DYN-DENORMAL-AVX512F: # %bb.0:
+; DYN-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX512F-NEXT: retq
+;
+; PRE-SIGN-AVX512BW-LABEL: canonicalize_undef:
+; PRE-SIGN-AVX512BW: # %bb.0:
+; PRE-SIGN-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX512BW-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_undef:
+; IEEE-DENORMAL-AVX512BW: # %bb.0:
+; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX512BW-NEXT: retq
+;
+; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_undef:
+; DYN-DENORMAL-AVX512BW: # %bb.0:
+; DYN-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX512BW-NEXT: retq
%canonicalized = call double @llvm.canonicalize.f64(double undef)
store double %canonicalized, double addrspace(1)* %out
ret void
}
define void @canonicalize_denormal1_f32_ieee(float addrspace(1)* %out) {
- ; IEEE-DENORMAL-LABEL: .LCPI7_0:
- ; IEEE-DENORMAL: .long 0x807fffff # float -1.17549421E-38
- ; IEEE-DENORMAL-LABEL: canonicalize_denormal1_f32_ieee:
- ; IEEE-DENORMAL: # %bb.0:
- ; IEEE-DENORMAL-NEXT: vmovss .LCPI7_0(%rip), %xmm0
- ; IEEE-DENORMAL-NEXT: vmovss %xmm0, (%rdi)
- ; IEEE-DENORMAL-NEXT: retq
-
+; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f32_ieee:
+; PRE-SIGN-SSE2: # %bb.0:
+; PRE-SIGN-SSE2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
+; PRE-SIGN-SSE2-NEXT: retq
+;
+; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_ieee:
+; IEEE-DENORMAL-SSE2: # %bb.0:
+; IEEE-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; IEEE-DENORMAL-SSE2-NEXT: retq
+;
+; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_ieee:
+; DYN-DENORMAL-SSE2: # %bb.0:
+; DYN-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; DYN-DENORMAL-SSE2-NEXT: retq
+;
+; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f32_ieee:
+; PRE-SIGN-AVX: # %bb.0:
+; PRE-SIGN-AVX-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
+; PRE-SIGN-AVX-NEXT: retq
+;
+; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_ieee:
+; IEEE-DENORMAL-AVX: # %bb.0:
+; IEEE-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; IEEE-DENORMAL-AVX-NEXT: retq
+;
+; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_ieee:
+; DYN-DENORMAL-AVX: # %bb.0:
+; DYN-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; DYN-DENORMAL-AVX-NEXT: retq
+;
+; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f32_ieee:
+; PRE-SIGN-AVX2: # %bb.0:
+; PRE-SIGN-AVX2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
+; PRE-SIGN-AVX2-NEXT: retq
+;
+; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_ieee:
+; IEEE-DENORMAL-AVX2: # %bb.0:
+; IEEE-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; IEEE-DENORMAL-AVX2-NEXT: retq
+;
+; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_ieee:
+; DYN-DENORMAL-AVX2: # %bb.0:
+; DYN-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; DYN-DENORMAL-AVX2-NEXT: retq
+;
+; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f32_ieee:
+; PRE-SIGN-AVX512F: # %bb.0:
+; PRE-SIGN-AVX512F-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
+; PRE-SIGN-AVX512F-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_ieee:
+; IEEE-DENORMAL-AVX512F: # %bb.0:
+; IEEE-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; IEEE-DENORMAL-AVX512F-NEXT: retq
+;
+; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_ieee:
+; DYN-DENORMAL-AVX512F: # %bb.0:
+; DYN-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; DYN-DENORMAL-AVX512F-NEXT: retq
+;
+; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f32_ieee:
+; PRE-SIGN-AVX512BW: # %bb.0:
+; PRE-SIGN-AVX512BW-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
+; PRE-SIGN-AVX512BW-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_ieee:
+; IEEE-DENORMAL-AVX512BW: # %bb.0:
+; IEEE-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; IEEE-DENORMAL-AVX512BW-NEXT: retq
+;
+; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_ieee:
+; DYN-DENORMAL-AVX512BW: # %bb.0:
+; DYN-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; DYN-DENORMAL-AVX512BW-NEXT: retq
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
store float %canonicalized, float addrspace(1)* %out
ret void
}
define void @canonicalize_denormal1_f64_ieee(double addrspace(1)* %out) {
- ; IEEE-DENORMAL-LABEL: .LCPI8_0:
- ; IEEE-DENORMAL: .quad 0x800fffffffffffff # double -2.2250738585072009E-308
- ; IEEE-DENORMAL-LABEL: canonicalize_denormal1_f64_ieee:
- ; IEEE-DENORMAL: # %bb.0:
- ; IEEE-DENORMAL-NEXT: vmovsd .LCPI8_0(%rip), %xmm0
- ; IEEE-DENORMAL-NEXT: vmovsd %xmm0, (%rdi)
- ; IEEE-DENORMAL-NEXT: retq
-
+; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f64_ieee:
+; PRE-SIGN-SSE2: # %bb.0:
+; PRE-SIGN-SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-SSE2-NEXT: retq
+;
+; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_ieee:
+; IEEE-DENORMAL-SSE2: # %bb.0:
+; IEEE-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-SSE2-NEXT: retq
+;
+; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_ieee:
+; DYN-DENORMAL-SSE2: # %bb.0:
+; DYN-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-SSE2-NEXT: retq
+;
+; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f64_ieee:
+; PRE-SIGN-AVX: # %bb.0:
+; PRE-SIGN-AVX-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX-NEXT: retq
+;
+; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_ieee:
+; IEEE-DENORMAL-AVX: # %bb.0:
+; IEEE-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX-NEXT: retq
+;
+; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_ieee:
+; DYN-DENORMAL-AVX: # %bb.0:
+; DYN-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX-NEXT: retq
+;
+; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f64_ieee:
+; PRE-SIGN-AVX2: # %bb.0:
+; PRE-SIGN-AVX2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX2-NEXT: retq
+;
+; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_ieee:
+; IEEE-DENORMAL-AVX2: # %bb.0:
+; IEEE-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX2-NEXT: retq
+;
+; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_ieee:
+; DYN-DENORMAL-AVX2: # %bb.0:
+; DYN-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX2-NEXT: retq
+;
+; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f64_ieee:
+; PRE-SIGN-AVX512F: # %bb.0:
+; PRE-SIGN-AVX512F-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX512F-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_ieee:
+; IEEE-DENORMAL-AVX512F: # %bb.0:
+; IEEE-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX512F-NEXT: retq
+;
+; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_ieee:
+; DYN-DENORMAL-AVX512F: # %bb.0:
+; DYN-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX512F-NEXT: retq
+;
+; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f64_ieee:
+; PRE-SIGN-AVX512BW: # %bb.0:
+; PRE-SIGN-AVX512BW-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX512BW-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_ieee:
+; IEEE-DENORMAL-AVX512BW: # %bb.0:
+; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX512BW-NEXT: retq
+;
+; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_ieee:
+; DYN-DENORMAL-AVX512BW: # %bb.0:
+; DYN-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX512BW-NEXT: retq
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
store double %canonicalized, double addrspace(1)* %out
ret void
}
define void @canonicalize_denormal1_f32_dynamic(float addrspace(1)* %out) {
- ; DYN-DENORMAL-LABEL: .LCPI9_0:
- ; DYN-DENORMAL: .long 0x807fffff # float -1.17549421E-38
- ; DYN-DENORMAL-LABEL: canonicalize_denormal1_f32_dynamic:
- ; DYN-DENORMAL: # %bb.0:
- ; DYN-DENORMAL-NEXT: vmovss .LCPI9_0(%rip), %xmm0
- ; DYN-DENORMAL-NEXT: vmovss %xmm0, (%rdi)
- ; DYN-DENORMAL-NEXT: retq
-
+; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f32_dynamic:
+; PRE-SIGN-SSE2: # %bb.0:
+; PRE-SIGN-SSE2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
+; PRE-SIGN-SSE2-NEXT: retq
+;
+; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_dynamic:
+; IEEE-DENORMAL-SSE2: # %bb.0:
+; IEEE-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; IEEE-DENORMAL-SSE2-NEXT: retq
+;
+; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_dynamic:
+; DYN-DENORMAL-SSE2: # %bb.0:
+; DYN-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; DYN-DENORMAL-SSE2-NEXT: retq
+;
+; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f32_dynamic:
+; PRE-SIGN-AVX: # %bb.0:
+; PRE-SIGN-AVX-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
+; PRE-SIGN-AVX-NEXT: retq
+;
+; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_dynamic:
+; IEEE-DENORMAL-AVX: # %bb.0:
+; IEEE-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; IEEE-DENORMAL-AVX-NEXT: retq
+;
+; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_dynamic:
+; DYN-DENORMAL-AVX: # %bb.0:
+; DYN-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; DYN-DENORMAL-AVX-NEXT: retq
+;
+; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f32_dynamic:
+; PRE-SIGN-AVX2: # %bb.0:
+; PRE-SIGN-AVX2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
+; PRE-SIGN-AVX2-NEXT: retq
+;
+; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_dynamic:
+; IEEE-DENORMAL-AVX2: # %bb.0:
+; IEEE-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; IEEE-DENORMAL-AVX2-NEXT: retq
+;
+; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_dynamic:
+; DYN-DENORMAL-AVX2: # %bb.0:
+; DYN-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; DYN-DENORMAL-AVX2-NEXT: retq
+;
+; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f32_dynamic:
+; PRE-SIGN-AVX512F: # %bb.0:
+; PRE-SIGN-AVX512F-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
+; PRE-SIGN-AVX512F-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_dynamic:
+; IEEE-DENORMAL-AVX512F: # %bb.0:
+; IEEE-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; IEEE-DENORMAL-AVX512F-NEXT: retq
+;
+; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_dynamic:
+; DYN-DENORMAL-AVX512F: # %bb.0:
+; DYN-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; DYN-DENORMAL-AVX512F-NEXT: retq
+;
+; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f32_dynamic:
+; PRE-SIGN-AVX512BW: # %bb.0:
+; PRE-SIGN-AVX512BW-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
+; PRE-SIGN-AVX512BW-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_dynamic:
+; IEEE-DENORMAL-AVX512BW: # %bb.0:
+; IEEE-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; IEEE-DENORMAL-AVX512BW-NEXT: retq
+;
+; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_dynamic:
+; DYN-DENORMAL-AVX512BW: # %bb.0:
+; DYN-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
+; DYN-DENORMAL-AVX512BW-NEXT: retq
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
store float %canonicalized, float addrspace(1)* %out
ret void
}
define void @canonicalize_denormal1_f64_dynamic(double addrspace(1)* %out) {
- ; DYN-DENORMAL-LABEL: .LCPI10_0:
- ; DYN-DENORMAL: .quad 0x800fffffffffffff # double -2.2250738585072009E-308
- ; DYN-DENORMAL-LABEL: canonicalize_denormal1_f64_dynamic:
- ; DYN-DENORMAL: # %bb.0:
- ; DYN-DENORMAL-NEXT: vmovsd .LCPI10_0(%rip), %xmm0
- ; DYN-DENORMAL-NEXT: vmovsd %xmm0, (%rdi)
- ; DYN-DENORMAL-NEXT: retq
-
+; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f64_dynamic:
+; PRE-SIGN-SSE2: # %bb.0:
+; PRE-SIGN-SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-SSE2-NEXT: retq
+;
+; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_dynamic:
+; IEEE-DENORMAL-SSE2: # %bb.0:
+; IEEE-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-SSE2-NEXT: retq
+;
+; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_dynamic:
+; DYN-DENORMAL-SSE2: # %bb.0:
+; DYN-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-SSE2-NEXT: retq
+;
+; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f64_dynamic:
+; PRE-SIGN-AVX: # %bb.0:
+; PRE-SIGN-AVX-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX-NEXT: retq
+;
+; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_dynamic:
+; IEEE-DENORMAL-AVX: # %bb.0:
+; IEEE-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX-NEXT: retq
+;
+; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_dynamic:
+; DYN-DENORMAL-AVX: # %bb.0:
+; DYN-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX-NEXT: retq
+;
+; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f64_dynamic:
+; PRE-SIGN-AVX2: # %bb.0:
+; PRE-SIGN-AVX2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX2-NEXT: retq
+;
+; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_dynamic:
+; IEEE-DENORMAL-AVX2: # %bb.0:
+; IEEE-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX2-NEXT: retq
+;
+; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_dynamic:
+; DYN-DENORMAL-AVX2: # %bb.0:
+; DYN-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX2-NEXT: retq
+;
+; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f64_dynamic:
+; PRE-SIGN-AVX512F: # %bb.0:
+; PRE-SIGN-AVX512F-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX512F-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_dynamic:
+; IEEE-DENORMAL-AVX512F: # %bb.0:
+; IEEE-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX512F-NEXT: retq
+;
+; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_dynamic:
+; DYN-DENORMAL-AVX512F: # %bb.0:
+; DYN-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX512F-NEXT: retq
+;
+; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f64_dynamic:
+; PRE-SIGN-AVX512BW: # %bb.0:
+; PRE-SIGN-AVX512BW-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi)
+; PRE-SIGN-AVX512BW-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_dynamic:
+; IEEE-DENORMAL-AVX512BW: # %bb.0:
+; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
+; IEEE-DENORMAL-AVX512BW-NEXT: retq
+;
+; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_dynamic:
+; DYN-DENORMAL-AVX512BW: # %bb.0:
+; DYN-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
+; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
+; DYN-DENORMAL-AVX512BW-NEXT: retq
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
store double %canonicalized, double addrspace(1)* %out
ret void
}
define void @canonicalize_denormal1_bfloat_pre_sign(bfloat addrspace(1)* %out) {
- ; CHECK-LABEL: .LCPI11_0:
- ; CHECK: .long 0x80000000 # float -0
- ; CHECK-LABEL: canonicalize_denormal1_bfloat_pre_sign:
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: vmovss .LCPI11_0(%rip), %xmm0
- ; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0
- ; CHECK-NEXT: vpextrw $0, %xmm0, (%rdi)
- ; CHECK-NEXT: retq
-
+; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_bfloat_pre_sign:
+; PRE-SIGN-SSE2: # %bb.0:
+; PRE-SIGN-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; PRE-SIGN-SSE2-NEXT: retq
+;
+; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_pre_sign:
+; IEEE-DENORMAL-SSE2: # %bb.0:
+; IEEE-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; IEEE-DENORMAL-SSE2-NEXT: retq
+;
+; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_pre_sign:
+; DYN-DENORMAL-SSE2: # %bb.0:
+; DYN-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; DYN-DENORMAL-SSE2-NEXT: retq
+;
+; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_bfloat_pre_sign:
+; PRE-SIGN-AVX: # %bb.0:
+; PRE-SIGN-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; PRE-SIGN-AVX-NEXT: retq
+;
+; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_pre_sign:
+; IEEE-DENORMAL-AVX: # %bb.0:
+; IEEE-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; IEEE-DENORMAL-AVX-NEXT: retq
+;
+; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_pre_sign:
+; DYN-DENORMAL-AVX: # %bb.0:
+; DYN-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; DYN-DENORMAL-AVX-NEXT: retq
+;
+; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_bfloat_pre_sign:
+; PRE-SIGN-AVX2: # %bb.0:
+; PRE-SIGN-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; PRE-SIGN-AVX2-NEXT: retq
+;
+; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_pre_sign:
+; IEEE-DENORMAL-AVX2: # %bb.0:
+; IEEE-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; IEEE-DENORMAL-AVX2-NEXT: retq
+;
+; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_pre_sign:
+; DYN-DENORMAL-AVX2: # %bb.0:
+; DYN-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; DYN-DENORMAL-AVX2-NEXT: retq
+;
+; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_bfloat_pre_sign:
+; PRE-SIGN-AVX512F: # %bb.0:
+; PRE-SIGN-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; PRE-SIGN-AVX512F-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_pre_sign:
+; IEEE-DENORMAL-AVX512F: # %bb.0:
+; IEEE-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; IEEE-DENORMAL-AVX512F-NEXT: retq
+;
+; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_pre_sign:
+; DYN-DENORMAL-AVX512F: # %bb.0:
+; DYN-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; DYN-DENORMAL-AVX512F-NEXT: retq
+;
+; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_bfloat_pre_sign:
+; PRE-SIGN-AVX512BW: # %bb.0:
+; PRE-SIGN-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; PRE-SIGN-AVX512BW-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_pre_sign:
+; IEEE-DENORMAL-AVX512BW: # %bb.0:
+; IEEE-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; IEEE-DENORMAL-AVX512BW-NEXT: retq
+;
+; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_pre_sign:
+; DYN-DENORMAL-AVX512BW: # %bb.0:
+; DYN-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; DYN-DENORMAL-AVX512BW-NEXT: retq
%canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat bitcast (i16 32768 to bfloat))
store bfloat %canonicalized, bfloat addrspace(1)* %out
ret void
@@ -174,15 +1147,80 @@ define void @canonicalize_denormal1_bfloat_pre_sign(bfloat addrspace(1)* %out) {
define void @canonicalize_denormal1_bfloat_ieee(bfloat addrspace(1)* %out) {
- ; IEEE-DENORMAL-LABEL: .LCPI12_0:
- ; IEEE-DENORMAL: .long 0x80000000 # float -0
- ; IEEE-DENORMAL-LABEL: canonicalize_denormal1_bfloat_ieee:
- ; IEEE-DENORMAL: # %bb.0:
- ; IEEE-DENORMAL-NEXT: vmovss .LCPI12_0(%rip), %xmm0
- ; IEEE-DENORMAL-NEXT: vcvtneps2bf16 %xmm0, %xmm0
- ; IEEE-DENORMAL-NEXT: vpextrw $0, %xmm0, (%rdi)
- ; IEEE-DENORMAL-NEXT: retq
-
+; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_bfloat_ieee:
+; PRE-SIGN-SSE2: # %bb.0:
+; PRE-SIGN-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; PRE-SIGN-SSE2-NEXT: retq
+;
+; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_ieee:
+; IEEE-DENORMAL-SSE2: # %bb.0:
+; IEEE-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; IEEE-DENORMAL-SSE2-NEXT: retq
+;
+; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_ieee:
+; DYN-DENORMAL-SSE2: # %bb.0:
+; DYN-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; DYN-DENORMAL-SSE2-NEXT: retq
+;
+; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_bfloat_ieee:
+; PRE-SIGN-AVX: # %bb.0:
+; PRE-SIGN-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; PRE-SIGN-AVX-NEXT: retq
+;
+; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_ieee:
+; IEEE-DENORMAL-AVX: # %bb.0:
+; IEEE-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; IEEE-DENORMAL-AVX-NEXT: retq
+;
+; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_ieee:
+; DYN-DENORMAL-AVX: # %bb.0:
+; DYN-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; DYN-DENORMAL-AVX-NEXT: retq
+;
+; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_bfloat_ieee:
+; PRE-SIGN-AVX2: # %bb.0:
+; PRE-SIGN-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; PRE-SIGN-AVX2-NEXT: retq
+;
+; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_ieee:
+; IEEE-DENORMAL-AVX2: # %bb.0:
+; IEEE-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; IEEE-DENORMAL-AVX2-NEXT: retq
+;
+; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_ieee:
+; DYN-DENORMAL-AVX2: # %bb.0:
+; DYN-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; DYN-DENORMAL-AVX2-NEXT: retq
+;
+; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_bfloat_ieee:
+; PRE-SIGN-AVX512F: # %bb.0:
+; PRE-SIGN-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; PRE-SIGN-AVX512F-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_ieee:
+; IEEE-DENORMAL-AVX512F: # %bb.0:
+; IEEE-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; IEEE-DENORMAL-AVX512F-NEXT: retq
+;
+; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_ieee:
+; DYN-DENORMAL-AVX512F: # %bb.0:
+; DYN-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; DYN-DENORMAL-AVX512F-NEXT: retq
+;
+; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_bfloat_ieee:
+; PRE-SIGN-AVX512BW: # %bb.0:
+; PRE-SIGN-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; PRE-SIGN-AVX512BW-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_ieee:
+; IEEE-DENORMAL-AVX512BW: # %bb.0:
+; IEEE-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; IEEE-DENORMAL-AVX512BW-NEXT: retq
+;
+; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_ieee:
+; DYN-DENORMAL-AVX512BW: # %bb.0:
+; DYN-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; DYN-DENORMAL-AVX512BW-NEXT: retq
%canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat bitcast (i16 32768 to bfloat))
store bfloat %canonicalized, bfloat addrspace(1)* %out
ret void
@@ -190,29 +1228,178 @@ define void @canonicalize_denormal1_bfloat_ieee(bfloat addrspace(1)* %out) {
define void @canonicalize_denormal1_bfloat_dynamic(bfloat addrspace(1)* %out) {
- ; DYN-DENORMAL-LABEL: .LCPI13_0:
- ; DYN-DENORMAL: .long 0x80000000 # float -0
- ; DYN-DENORMAL-LABEL: canonicalize_denormal1_bfloat_dynamic:
- ; DYN-DENORMAL: # %bb.0:
- ; DYN-DENORMAL-NEXT: vmovss .LCPI13_0(%rip), %xmm0
- ; DYN-DENORMAL-NEXT: vcvtneps2bf16 %xmm0, %xmm0
- ; DYN-DENORMAL-NEXT: vpextrw $0, %xmm0, (%rdi)
- ; DYN-DENORMAL-NEXT: retq
-
+; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_bfloat_dynamic:
+; PRE-SIGN-SSE2: # %bb.0:
+; PRE-SIGN-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; PRE-SIGN-SSE2-NEXT: retq
+;
+; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_dynamic:
+; IEEE-DENORMAL-SSE2: # %bb.0:
+; IEEE-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; IEEE-DENORMAL-SSE2-NEXT: retq
+;
+; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_dynamic:
+; DYN-DENORMAL-SSE2: # %bb.0:
+; DYN-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; DYN-DENORMAL-SSE2-NEXT: retq
+;
+; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_bfloat_dynamic:
+; PRE-SIGN-AVX: # %bb.0:
+; PRE-SIGN-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; PRE-SIGN-AVX-NEXT: retq
+;
+; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_dynamic:
+; IEEE-DENORMAL-AVX: # %bb.0:
+; IEEE-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; IEEE-DENORMAL-AVX-NEXT: retq
+;
+; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_dynamic:
+; DYN-DENORMAL-AVX: # %bb.0:
+; DYN-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; DYN-DENORMAL-AVX-NEXT: retq
+;
+; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_bfloat_dynamic:
+; PRE-SIGN-AVX2: # %bb.0:
+; PRE-SIGN-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; PRE-SIGN-AVX2-NEXT: retq
+;
+; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_dynamic:
+; IEEE-DENORMAL-AVX2: # %bb.0:
+; IEEE-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; IEEE-DENORMAL-AVX2-NEXT: retq
+;
+; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_dynamic:
+; DYN-DENORMAL-AVX2: # %bb.0:
+; DYN-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; DYN-DENORMAL-AVX2-NEXT: retq
+;
+; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_bfloat_dynamic:
+; PRE-SIGN-AVX512F: # %bb.0:
+; PRE-SIGN-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; PRE-SIGN-AVX512F-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_dynamic:
+; IEEE-DENORMAL-AVX512F: # %bb.0:
+; IEEE-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; IEEE-DENORMAL-AVX512F-NEXT: retq
+;
+; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_dynamic:
+; DYN-DENORMAL-AVX512F: # %bb.0:
+; DYN-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; DYN-DENORMAL-AVX512F-NEXT: retq
+;
+; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_bfloat_dynamic:
+; PRE-SIGN-AVX512BW: # %bb.0:
+; PRE-SIGN-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; PRE-SIGN-AVX512BW-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_dynamic:
+; IEEE-DENORMAL-AVX512BW: # %bb.0:
+; IEEE-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; IEEE-DENORMAL-AVX512BW-NEXT: retq
+;
+; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_dynamic:
+; DYN-DENORMAL-AVX512BW: # %bb.0:
+; DYN-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000
+; DYN-DENORMAL-AVX512BW-NEXT: retq
%canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat bitcast (i16 32768 to bfloat))
store bfloat %canonicalized, bfloat addrspace(1)* %out
ret void
}
define void @canonicalize_denormal1_half_pre_sign(half addrspace(1)* %out) {
- ; CHECK-LABEL: .LCPI14_0:
- ; CHECK: .short 0x8000 # half -0
- ; CHECK-LABEL: canonicalize_denormal1_half_pre_sign:
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: vmovsh .LCPI14_0(%rip), %xmm0
- ; CHECK-NEXT: vmovsh %xmm0, (%rdi)
- ; CHECK-NEXT: retq
-
+; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_half_pre_sign:
+; PRE-SIGN-SSE2: # %bb.0:
+; PRE-SIGN-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; PRE-SIGN-SSE2-NEXT: pextrw $0, %xmm0, %eax
+; PRE-SIGN-SSE2-NEXT: movw %ax, (%rdi)
+; PRE-SIGN-SSE2-NEXT: retq
+;
+; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_pre_sign:
+; IEEE-DENORMAL-SSE2: # %bb.0:
+; IEEE-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; IEEE-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax
+; IEEE-DENORMAL-SSE2-NEXT: movw %ax, (%rdi)
+; IEEE-DENORMAL-SSE2-NEXT: retq
+;
+; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_pre_sign:
+; DYN-DENORMAL-SSE2: # %bb.0:
+; DYN-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; DYN-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax
+; DYN-DENORMAL-SSE2-NEXT: movw %ax, (%rdi)
+; DYN-DENORMAL-SSE2-NEXT: retq
+;
+; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_half_pre_sign:
+; PRE-SIGN-AVX: # %bb.0:
+; PRE-SIGN-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; PRE-SIGN-AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
+; PRE-SIGN-AVX-NEXT: retq
+;
+; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_pre_sign:
+; IEEE-DENORMAL-AVX: # %bb.0:
+; IEEE-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; IEEE-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
+; IEEE-DENORMAL-AVX-NEXT: retq
+;
+; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_pre_sign:
+; DYN-DENORMAL-AVX: # %bb.0:
+; DYN-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; DYN-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
+; DYN-DENORMAL-AVX-NEXT: retq
+;
+; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_half_pre_sign:
+; PRE-SIGN-AVX2: # %bb.0:
+; PRE-SIGN-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; PRE-SIGN-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
+; PRE-SIGN-AVX2-NEXT: retq
+;
+; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_pre_sign:
+; IEEE-DENORMAL-AVX2: # %bb.0:
+; IEEE-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; IEEE-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
+; IEEE-DENORMAL-AVX2-NEXT: retq
+;
+; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_pre_sign:
+; DYN-DENORMAL-AVX2: # %bb.0:
+; DYN-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; DYN-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
+; DYN-DENORMAL-AVX2-NEXT: retq
+;
+; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_half_pre_sign:
+; PRE-SIGN-AVX512F: # %bb.0:
+; PRE-SIGN-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; PRE-SIGN-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
+; PRE-SIGN-AVX512F-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_pre_sign:
+; IEEE-DENORMAL-AVX512F: # %bb.0:
+; IEEE-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; IEEE-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
+; IEEE-DENORMAL-AVX512F-NEXT: retq
+;
+; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_pre_sign:
+; DYN-DENORMAL-AVX512F: # %bb.0:
+; DYN-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; DYN-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
+; DYN-DENORMAL-AVX512F-NEXT: retq
+;
+; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_half_pre_sign:
+; PRE-SIGN-AVX512BW: # %bb.0:
+; PRE-SIGN-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; PRE-SIGN-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
+; PRE-SIGN-AVX512BW-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_pre_sign:
+; IEEE-DENORMAL-AVX512BW: # %bb.0:
+; IEEE-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; IEEE-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
+; IEEE-DENORMAL-AVX512BW-NEXT: retq
+;
+; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_pre_sign:
+; DYN-DENORMAL-AVX512BW: # %bb.0:
+; DYN-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; DYN-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
+; DYN-DENORMAL-AVX512BW-NEXT: retq
%canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 32768 to half))
store half %canonicalized, half addrspace(1)* %out
ret void
@@ -220,68 +1407,482 @@ define void @canonicalize_denormal1_half_pre_sign(half addrspace(1)* %out) {
define void @canonicalize_denormal1_half_ieee(half addrspace(1)* %out) {
- ; IEEE-DENORMAL-LABEL: .LCPI15_0:
- ; IEEE-DENORMAL: .short 0x8000 # half -0
- ; IEEE-DENORMAL-LABEL: canonicalize_denormal1_half_ieee:
- ; IEEE-DENORMAL: # %bb.0:
- ; IEEE-DENORMAL-NEXT: vmovsh .LCPI15_0(%rip), %xmm0
- ; IEEE-DENORMAL-NEXT: vmovsh %xmm0, (%rdi)
- ; IEEE-DENORMAL-NEXT: retq
-
+; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_half_ieee:
+; PRE-SIGN-SSE2: # %bb.0:
+; PRE-SIGN-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; PRE-SIGN-SSE2-NEXT: pextrw $0, %xmm0, %eax
+; PRE-SIGN-SSE2-NEXT: movw %ax, (%rdi)
+; PRE-SIGN-SSE2-NEXT: retq
+;
+; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_ieee:
+; IEEE-DENORMAL-SSE2: # %bb.0:
+; IEEE-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; IEEE-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax
+; IEEE-DENORMAL-SSE2-NEXT: movw %ax, (%rdi)
+; IEEE-DENORMAL-SSE2-NEXT: retq
+;
+; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_ieee:
+; DYN-DENORMAL-SSE2: # %bb.0:
+; DYN-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; DYN-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax
+; DYN-DENORMAL-SSE2-NEXT: movw %ax, (%rdi)
+; DYN-DENORMAL-SSE2-NEXT: retq
+;
+; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_half_ieee:
+; PRE-SIGN-AVX: # %bb.0:
+; PRE-SIGN-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; PRE-SIGN-AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
+; PRE-SIGN-AVX-NEXT: retq
+;
+; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_ieee:
+; IEEE-DENORMAL-AVX: # %bb.0:
+; IEEE-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; IEEE-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
+; IEEE-DENORMAL-AVX-NEXT: retq
+;
+; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_ieee:
+; DYN-DENORMAL-AVX: # %bb.0:
+; DYN-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; DYN-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
+; DYN-DENORMAL-AVX-NEXT: retq
+;
+; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_half_ieee:
+; PRE-SIGN-AVX2: # %bb.0:
+; PRE-SIGN-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; PRE-SIGN-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
+; PRE-SIGN-AVX2-NEXT: retq
+;
+; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_ieee:
+; IEEE-DENORMAL-AVX2: # %bb.0:
+; IEEE-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; IEEE-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
+; IEEE-DENORMAL-AVX2-NEXT: retq
+;
+; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_ieee:
+; DYN-DENORMAL-AVX2: # %bb.0:
+; DYN-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; DYN-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
+; DYN-DENORMAL-AVX2-NEXT: retq
+;
+; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_half_ieee:
+; PRE-SIGN-AVX512F: # %bb.0:
+; PRE-SIGN-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; PRE-SIGN-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
+; PRE-SIGN-AVX512F-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_ieee:
+; IEEE-DENORMAL-AVX512F: # %bb.0:
+; IEEE-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; IEEE-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
+; IEEE-DENORMAL-AVX512F-NEXT: retq
+;
+; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_ieee:
+; DYN-DENORMAL-AVX512F: # %bb.0:
+; DYN-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; DYN-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
+; DYN-DENORMAL-AVX512F-NEXT: retq
+;
+; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_half_ieee:
+; PRE-SIGN-AVX512BW: # %bb.0:
+; PRE-SIGN-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; PRE-SIGN-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
+; PRE-SIGN-AVX512BW-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_ieee:
+; IEEE-DENORMAL-AVX512BW: # %bb.0:
+; IEEE-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; IEEE-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
+; IEEE-DENORMAL-AVX512BW-NEXT: retq
+;
+; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_ieee:
+; DYN-DENORMAL-AVX512BW: # %bb.0:
+; DYN-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; DYN-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
+; DYN-DENORMAL-AVX512BW-NEXT: retq
%canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 32768 to half))
store half %canonicalized, half addrspace(1)* %out
ret void
}
define void @canonicalize_denormal1_half_dynamic(half addrspace(1)* %out) {
- ; DYN-DENORMAL-LABEL: .LCPI16_0:
- ; DYN-DENORMAL: .short 0x8000 # half -0
- ; DYN-DENORMAL-LABEL: canonicalize_denormal1_half_dynamic:
- ; DYN-DENORMAL: # %bb.0:
- ; DYN-DENORMAL-NEXT: vmovsh .LCPI16_0(%rip), %xmm0
- ; DYN-DENORMAL-NEXT: vmovsh %xmm0, (%rdi)
- ; DYN-DENORMAL-NEXT: retq
-
+; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_half_dynamic:
+; PRE-SIGN-SSE2: # %bb.0:
+; PRE-SIGN-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; PRE-SIGN-SSE2-NEXT: pextrw $0, %xmm0, %eax
+; PRE-SIGN-SSE2-NEXT: movw %ax, (%rdi)
+; PRE-SIGN-SSE2-NEXT: retq
+;
+; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_dynamic:
+; IEEE-DENORMAL-SSE2: # %bb.0:
+; IEEE-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; IEEE-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax
+; IEEE-DENORMAL-SSE2-NEXT: movw %ax, (%rdi)
+; IEEE-DENORMAL-SSE2-NEXT: retq
+;
+; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_dynamic:
+; DYN-DENORMAL-SSE2: # %bb.0:
+; DYN-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; DYN-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax
+; DYN-DENORMAL-SSE2-NEXT: movw %ax, (%rdi)
+; DYN-DENORMAL-SSE2-NEXT: retq
+;
+; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_half_dynamic:
+; PRE-SIGN-AVX: # %bb.0:
+; PRE-SIGN-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; PRE-SIGN-AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
+; PRE-SIGN-AVX-NEXT: retq
+;
+; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_dynamic:
+; IEEE-DENORMAL-AVX: # %bb.0:
+; IEEE-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; IEEE-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
+; IEEE-DENORMAL-AVX-NEXT: retq
+;
+; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_dynamic:
+; DYN-DENORMAL-AVX: # %bb.0:
+; DYN-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; DYN-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
+; DYN-DENORMAL-AVX-NEXT: retq
+;
+; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_half_dynamic:
+; PRE-SIGN-AVX2: # %bb.0:
+; PRE-SIGN-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; PRE-SIGN-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
+; PRE-SIGN-AVX2-NEXT: retq
+;
+; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_dynamic:
+; IEEE-DENORMAL-AVX2: # %bb.0:
+; IEEE-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; IEEE-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
+; IEEE-DENORMAL-AVX2-NEXT: retq
+;
+; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_dynamic:
+; DYN-DENORMAL-AVX2: # %bb.0:
+; DYN-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; DYN-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
+; DYN-DENORMAL-AVX2-NEXT: retq
+;
+; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_half_dynamic:
+; PRE-SIGN-AVX512F: # %bb.0:
+; PRE-SIGN-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; PRE-SIGN-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
+; PRE-SIGN-AVX512F-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_dynamic:
+; IEEE-DENORMAL-AVX512F: # %bb.0:
+; IEEE-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; IEEE-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
+; IEEE-DENORMAL-AVX512F-NEXT: retq
+;
+; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_dynamic:
+; DYN-DENORMAL-AVX512F: # %bb.0:
+; DYN-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; DYN-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
+; DYN-DENORMAL-AVX512F-NEXT: retq
+;
+; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_half_dynamic:
+; PRE-SIGN-AVX512BW: # %bb.0:
+; PRE-SIGN-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; PRE-SIGN-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
+; PRE-SIGN-AVX512BW-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_dynamic:
+; IEEE-DENORMAL-AVX512BW: # %bb.0:
+; IEEE-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; IEEE-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
+; IEEE-DENORMAL-AVX512BW-NEXT: retq
+;
+; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_dynamic:
+; DYN-DENORMAL-AVX512BW: # %bb.0:
+; DYN-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; DYN-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
+; DYN-DENORMAL-AVX512BW-NEXT: retq
%canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 32768 to half))
store half %canonicalized, half addrspace(1)* %out
ret void
}
define void @canonicalize_denormal1_x86_fp80_pre_sign(x86_fp80 addrspace(1)* %out) {
- ; CHECK-LAEBL: .LCPI17_0:
- ; CHECK: .long 0x00000000 # float 0
- ; CHECK-LAEBL: canonicalize_denormal1_x86_fp80_pre_sign
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: flds .LCPI17_0(%rip)
- ; CHECK-NEXT: fstpt (%rdi)
- ; CHECK-NEXT: retq
+; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
+; PRE-SIGN-SSE2: # %bb.0:
+; PRE-SIGN-SSE2-NEXT: fldz
+; PRE-SIGN-SSE2-NEXT: fstpt (%rdi)
+; PRE-SIGN-SSE2-NEXT: retq
+;
+; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
+; IEEE-DENORMAL-SSE2: # %bb.0:
+; IEEE-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; IEEE-DENORMAL-SSE2-NEXT: fstpt (%rdi)
+; IEEE-DENORMAL-SSE2-NEXT: retq
+;
+; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
+; DYN-DENORMAL-SSE2: # %bb.0:
+; DYN-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; DYN-DENORMAL-SSE2-NEXT: fstpt (%rdi)
+; DYN-DENORMAL-SSE2-NEXT: retq
+;
+; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
+; PRE-SIGN-AVX: # %bb.0:
+; PRE-SIGN-AVX-NEXT: fldz
+; PRE-SIGN-AVX-NEXT: fstpt (%rdi)
+; PRE-SIGN-AVX-NEXT: retq
+;
+; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
+; IEEE-DENORMAL-AVX: # %bb.0:
+; IEEE-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; IEEE-DENORMAL-AVX-NEXT: fstpt (%rdi)
+; IEEE-DENORMAL-AVX-NEXT: retq
+;
+; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
+; DYN-DENORMAL-AVX: # %bb.0:
+; DYN-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; DYN-DENORMAL-AVX-NEXT: fstpt (%rdi)
+; DYN-DENORMAL-AVX-NEXT: retq
+;
+; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
+; PRE-SIGN-AVX2: # %bb.0:
+; PRE-SIGN-AVX2-NEXT: fldz
+; PRE-SIGN-AVX2-NEXT: fstpt (%rdi)
+; PRE-SIGN-AVX2-NEXT: retq
+;
+; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
+; IEEE-DENORMAL-AVX2: # %bb.0:
+; IEEE-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; IEEE-DENORMAL-AVX2-NEXT: fstpt (%rdi)
+; IEEE-DENORMAL-AVX2-NEXT: retq
+;
+; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
+; DYN-DENORMAL-AVX2: # %bb.0:
+; DYN-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; DYN-DENORMAL-AVX2-NEXT: fstpt (%rdi)
+; DYN-DENORMAL-AVX2-NEXT: retq
+;
+; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
+; PRE-SIGN-AVX512F: # %bb.0:
+; PRE-SIGN-AVX512F-NEXT: fldz
+; PRE-SIGN-AVX512F-NEXT: fstpt (%rdi)
+; PRE-SIGN-AVX512F-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
+; IEEE-DENORMAL-AVX512F: # %bb.0:
+; IEEE-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; IEEE-DENORMAL-AVX512F-NEXT: fstpt (%rdi)
+; IEEE-DENORMAL-AVX512F-NEXT: retq
+;
+; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
+; DYN-DENORMAL-AVX512F: # %bb.0:
+; DYN-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; DYN-DENORMAL-AVX512F-NEXT: fstpt (%rdi)
+; DYN-DENORMAL-AVX512F-NEXT: retq
+;
+; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
+; PRE-SIGN-AVX512BW: # %bb.0:
+; PRE-SIGN-AVX512BW-NEXT: fldz
+; PRE-SIGN-AVX512BW-NEXT: fstpt (%rdi)
+; PRE-SIGN-AVX512BW-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
+; IEEE-DENORMAL-AVX512BW: # %bb.0:
+; IEEE-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; IEEE-DENORMAL-AVX512BW-NEXT: fstpt (%rdi)
+; IEEE-DENORMAL-AVX512BW-NEXT: retq
+;
+; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
+; DYN-DENORMAL-AVX512BW: # %bb.0:
+; DYN-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; DYN-DENORMAL-AVX512BW-NEXT: fstpt (%rdi)
+; DYN-DENORMAL-AVX512BW-NEXT: retq
%canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK00000000000000000001)
store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out
ret void
}
define void @canonicalize_denormal1_x86_fp80_dynamic(x86_fp80 addrspace(1)* %out) {
- ; DYN-DENORMAL-LAEBL: .LCPI17_0:
- ; DYN-DENORMAL: .quad 0x0000000000000001 # x86_fp80 3.64519953188247460253E-4951
- ; DYN-DENORMAL-LAEBL: canonicalize_denormal1_x86_fp80_dynamic
- ; DYN-DENORMAL: # %bb.0:
- ; DYN-DENORMAL-NEXT: fldt .LCPI17_0(%rip)
- ; DYN-DENORMAL-NEXT: fstpt (%rdi)
- ; DYN-DENORMAL-NEXT: retq
+; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
+; PRE-SIGN-SSE2: # %bb.0:
+; PRE-SIGN-SSE2-NEXT: fldz
+; PRE-SIGN-SSE2-NEXT: fstpt (%rdi)
+; PRE-SIGN-SSE2-NEXT: retq
+;
+; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
+; IEEE-DENORMAL-SSE2: # %bb.0:
+; IEEE-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; IEEE-DENORMAL-SSE2-NEXT: fstpt (%rdi)
+; IEEE-DENORMAL-SSE2-NEXT: retq
+;
+; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
+; DYN-DENORMAL-SSE2: # %bb.0:
+; DYN-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; DYN-DENORMAL-SSE2-NEXT: fstpt (%rdi)
+; DYN-DENORMAL-SSE2-NEXT: retq
+;
+; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
+; PRE-SIGN-AVX: # %bb.0:
+; PRE-SIGN-AVX-NEXT: fldz
+; PRE-SIGN-AVX-NEXT: fstpt (%rdi)
+; PRE-SIGN-AVX-NEXT: retq
+;
+; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
+; IEEE-DENORMAL-AVX: # %bb.0:
+; IEEE-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; IEEE-DENORMAL-AVX-NEXT: fstpt (%rdi)
+; IEEE-DENORMAL-AVX-NEXT: retq
+;
+; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
+; DYN-DENORMAL-AVX: # %bb.0:
+; DYN-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; DYN-DENORMAL-AVX-NEXT: fstpt (%rdi)
+; DYN-DENORMAL-AVX-NEXT: retq
+;
+; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
+; PRE-SIGN-AVX2: # %bb.0:
+; PRE-SIGN-AVX2-NEXT: fldz
+; PRE-SIGN-AVX2-NEXT: fstpt (%rdi)
+; PRE-SIGN-AVX2-NEXT: retq
+;
+; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
+; IEEE-DENORMAL-AVX2: # %bb.0:
+; IEEE-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; IEEE-DENORMAL-AVX2-NEXT: fstpt (%rdi)
+; IEEE-DENORMAL-AVX2-NEXT: retq
+;
+; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
+; DYN-DENORMAL-AVX2: # %bb.0:
+; DYN-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; DYN-DENORMAL-AVX2-NEXT: fstpt (%rdi)
+; DYN-DENORMAL-AVX2-NEXT: retq
+;
+; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
+; PRE-SIGN-AVX512F: # %bb.0:
+; PRE-SIGN-AVX512F-NEXT: fldz
+; PRE-SIGN-AVX512F-NEXT: fstpt (%rdi)
+; PRE-SIGN-AVX512F-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
+; IEEE-DENORMAL-AVX512F: # %bb.0:
+; IEEE-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; IEEE-DENORMAL-AVX512F-NEXT: fstpt (%rdi)
+; IEEE-DENORMAL-AVX512F-NEXT: retq
+;
+; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
+; DYN-DENORMAL-AVX512F: # %bb.0:
+; DYN-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; DYN-DENORMAL-AVX512F-NEXT: fstpt (%rdi)
+; DYN-DENORMAL-AVX512F-NEXT: retq
+;
+; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
+; PRE-SIGN-AVX512BW: # %bb.0:
+; PRE-SIGN-AVX512BW-NEXT: fldz
+; PRE-SIGN-AVX512BW-NEXT: fstpt (%rdi)
+; PRE-SIGN-AVX512BW-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
+; IEEE-DENORMAL-AVX512BW: # %bb.0:
+; IEEE-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; IEEE-DENORMAL-AVX512BW-NEXT: fstpt (%rdi)
+; IEEE-DENORMAL-AVX512BW-NEXT: retq
+;
+; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
+; DYN-DENORMAL-AVX512BW: # %bb.0:
+; DYN-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; DYN-DENORMAL-AVX512BW-NEXT: fstpt (%rdi)
+; DYN-DENORMAL-AVX512BW-NEXT: retq
%canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK00000000000000000001)
store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out
ret void
}
define void @canonicalize_denormal1_x86_fp80_ieee(x86_fp80 addrspace(1)* %out) {
- ; IEEE-DENORMAL-LAEBL: .LCPI17_0:
- ; IEEE-DENORMAL: .quad 0x0000000000000001 # x86_fp80 3.64519953188247460253E-4951
- ; IEEE-DENORMAL-LAEBL: canonicalize_denormal1_x86_fp80_ieee
- ; IEEE-DENORMAL: # %bb.0:
- ; IEEE-DENORMAL-NEXT: fldt .LCPI17_0(%rip)
- ; IEEE-DENORMAL-NEXT: fstpt (%rdi)
- ; IEEE-DENORMAL-NEXT: retq
+; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_x86_fp80_ieee:
+; PRE-SIGN-SSE2: # %bb.0:
+; PRE-SIGN-SSE2-NEXT: fldz
+; PRE-SIGN-SSE2-NEXT: fstpt (%rdi)
+; PRE-SIGN-SSE2-NEXT: retq
+;
+; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_ieee:
+; IEEE-DENORMAL-SSE2: # %bb.0:
+; IEEE-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; IEEE-DENORMAL-SSE2-NEXT: fstpt (%rdi)
+; IEEE-DENORMAL-SSE2-NEXT: retq
+;
+; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_ieee:
+; DYN-DENORMAL-SSE2: # %bb.0:
+; DYN-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; DYN-DENORMAL-SSE2-NEXT: fstpt (%rdi)
+; DYN-DENORMAL-SSE2-NEXT: retq
+;
+; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_x86_fp80_ieee:
+; PRE-SIGN-AVX: # %bb.0:
+; PRE-SIGN-AVX-NEXT: fldz
+; PRE-SIGN-AVX-NEXT: fstpt (%rdi)
+; PRE-SIGN-AVX-NEXT: retq
+;
+; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_ieee:
+; IEEE-DENORMAL-AVX: # %bb.0:
+; IEEE-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; IEEE-DENORMAL-AVX-NEXT: fstpt (%rdi)
+; IEEE-DENORMAL-AVX-NEXT: retq
+;
+; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_ieee:
+; DYN-DENORMAL-AVX: # %bb.0:
+; DYN-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; DYN-DENORMAL-AVX-NEXT: fstpt (%rdi)
+; DYN-DENORMAL-AVX-NEXT: retq
+;
+; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_x86_fp80_ieee:
+; PRE-SIGN-AVX2: # %bb.0:
+; PRE-SIGN-AVX2-NEXT: fldz
+; PRE-SIGN-AVX2-NEXT: fstpt (%rdi)
+; PRE-SIGN-AVX2-NEXT: retq
+;
+; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_ieee:
+; IEEE-DENORMAL-AVX2: # %bb.0:
+; IEEE-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; IEEE-DENORMAL-AVX2-NEXT: fstpt (%rdi)
+; IEEE-DENORMAL-AVX2-NEXT: retq
+;
+; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_ieee:
+; DYN-DENORMAL-AVX2: # %bb.0:
+; DYN-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; DYN-DENORMAL-AVX2-NEXT: fstpt (%rdi)
+; DYN-DENORMAL-AVX2-NEXT: retq
+;
+; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_ieee:
+; PRE-SIGN-AVX512F: # %bb.0:
+; PRE-SIGN-AVX512F-NEXT: fldz
+; PRE-SIGN-AVX512F-NEXT: fstpt (%rdi)
+; PRE-SIGN-AVX512F-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_ieee:
+; IEEE-DENORMAL-AVX512F: # %bb.0:
+; IEEE-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; IEEE-DENORMAL-AVX512F-NEXT: fstpt (%rdi)
+; IEEE-DENORMAL-AVX512F-NEXT: retq
+;
+; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_ieee:
+; DYN-DENORMAL-AVX512F: # %bb.0:
+; DYN-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; DYN-DENORMAL-AVX512F-NEXT: fstpt (%rdi)
+; DYN-DENORMAL-AVX512F-NEXT: retq
+;
+; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_ieee:
+; PRE-SIGN-AVX512BW: # %bb.0:
+; PRE-SIGN-AVX512BW-NEXT: fldz
+; PRE-SIGN-AVX512BW-NEXT: fstpt (%rdi)
+; PRE-SIGN-AVX512BW-NEXT: retq
+;
+; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_ieee:
+; IEEE-DENORMAL-AVX512BW: # %bb.0:
+; IEEE-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; IEEE-DENORMAL-AVX512BW-NEXT: fstpt (%rdi)
+; IEEE-DENORMAL-AVX512BW-NEXT: retq
+;
+; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_ieee:
+; DYN-DENORMAL-AVX512BW: # %bb.0:
+; DYN-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; DYN-DENORMAL-AVX512BW-NEXT: fstpt (%rdi)
+; DYN-DENORMAL-AVX512BW-NEXT: retq
%canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK00000000000000000001)
store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out
ret void
-}
\ No newline at end of file
+}
diff --git a/llvm/test/CodeGen/X86/canonicalize-vars.ll b/llvm/test/CodeGen/X86/canonicalize-vars.ll
index c1b5dd0dddcd2b..0075386c023618 100644
--- a/llvm/test/CodeGen/X86/canonicalize-vars.ll
+++ b/llvm/test/CodeGen/X86/canonicalize-vars.ll
@@ -1,14 +1,266 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5
-; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 < %s | FileCheck %s
+; RUN: llc -mattr=sse -mtriple=x86_64 < %s | FileCheck %s -check-prefix=SSE
+; RUN: llc -mattr=sse2 -mtriple=x86_64 < %s | FileCheck %s -check-prefix=SSE2
+; RUN: llc -mattr=+avx -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX
+; RUN: llc -mattr=+avx2 -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX2
+; RUN: llc -mattr=+avx512f -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX512F
+; RUN: llc -mattr=+avx512bw -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX512BW
+
+
+define float @canon_fp32_varargsf32(float %a) {
+; SSE-LABEL: canon_fp32_varargsf32:
+; SSE: # %bb.0:
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: canon_fp32_varargsf32:
+; SSE2: # %bb.0:
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: canon_fp32_varargsf32:
+; AVX: # %bb.0:
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: canon_fp32_varargsf32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: canon_fp32_varargsf32:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: canon_fp32_varargsf32:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: retq
+ %canonicalized = call float @llvm.canonicalize.f32(float %a)
+ ret float %canonicalized
+}
+
+define x86_fp80 @canon_fp32_varargsf80(x86_fp80 %a) {
+; SSE-LABEL: canon_fp32_varargsf80:
+; SSE: # %bb.0:
+; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: canon_fp32_varargsf80:
+; SSE2: # %bb.0:
+; SSE2-NEXT: fldt {{[0-9]+}}(%rsp)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: canon_fp32_varargsf80:
+; AVX: # %bb.0:
+; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: canon_fp32_varargsf80:
+; AVX2: # %bb.0:
+; AVX2-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: canon_fp32_varargsf80:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: canon_fp32_varargsf80:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512BW-NEXT: retq
+ %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %a)
+ ret x86_fp80 %canonicalized
+}
+
+define bfloat @canon_fp32_varargsbf16(bfloat %a) {
+; SSE-LABEL: canon_fp32_varargsbf16:
+; SSE: # %bb.0:
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: canon_fp32_varargsbf16:
+; SSE2: # %bb.0:
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: canon_fp32_varargsbf16:
+; AVX: # %bb.0:
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: canon_fp32_varargsbf16:
+; AVX2: # %bb.0:
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: canon_fp32_varargsbf16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: canon_fp32_varargsbf16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: retq
+ %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat %a)
+ ret bfloat %canonicalized
+}
define half @complex_canonicalize_fmul_half(half %a, half %b) {
-; CHECK-LABEL: complex_canonicalize_fmul_half:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm2
-; CHECK-NEXT: vsubsh %xmm0, %xmm2, %xmm0
-; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: retq
+; SSE-LABEL: complex_canonicalize_fmul_half:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
+; SSE-NEXT: movss %xmm1, (%rsp) # 4-byte Spill
+; SSE-NEXT: callq __extendhfsf2 at PLT
+; SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SSE-NEXT: movss (%rsp), %xmm0 # 4-byte Reload
+; SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT: callq __extendhfsf2 at PLT
+; SSE-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
+; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
+; SSE-NEXT: # xmm1 = mem[0],zero,zero,zero
+; SSE-NEXT: subss %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: callq __truncsfhf2 at PLT
+; SSE-NEXT: callq __extendhfsf2 at PLT
+; SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SSE-NEXT: addss (%rsp), %xmm0 # 4-byte Folded Reload
+; SSE-NEXT: callq __truncsfhf2 at PLT
+; SSE-NEXT: callq __extendhfsf2 at PLT
+; SSE-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; SSE-NEXT: callq __truncsfhf2 at PLT
+; SSE-NEXT: callq __extendhfsf2 at PLT
+; SSE-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload
+; SSE-NEXT: callq __truncsfhf2 at PLT
+; SSE-NEXT: popq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: complex_canonicalize_fmul_half:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: pushq %rax
+; SSE2-NEXT: .cfi_def_cfa_offset 16
+; SSE2-NEXT: movss %xmm1, (%rsp) # 4-byte Spill
+; SSE2-NEXT: callq __extendhfsf2 at PLT
+; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SSE2-NEXT: movss (%rsp), %xmm0 # 4-byte Reload
+; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: callq __extendhfsf2 at PLT
+; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
+; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
+; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero
+; SSE2-NEXT: subss %xmm0, %xmm1
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: callq __truncsfhf2 at PLT
+; SSE2-NEXT: callq __extendhfsf2 at PLT
+; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SSE2-NEXT: addss (%rsp), %xmm0 # 4-byte Folded Reload
+; SSE2-NEXT: callq __truncsfhf2 at PLT
+; SSE2-NEXT: callq __extendhfsf2 at PLT
+; SSE2-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; SSE2-NEXT: callq __truncsfhf2 at PLT
+; SSE2-NEXT: callq __extendhfsf2 at PLT
+; SSE2-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload
+; SSE2-NEXT: callq __truncsfhf2 at PLT
+; SSE2-NEXT: popq %rax
+; SSE2-NEXT: .cfi_def_cfa_offset 8
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: complex_canonicalize_fmul_half:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 16
+; AVX-NEXT: vmovss %xmm1, (%rsp) # 4-byte Spill
+; AVX-NEXT: callq __extendhfsf2 at PLT
+; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; AVX-NEXT: vmovss (%rsp), %xmm0 # 4-byte Reload
+; AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
+; AVX-NEXT: callq __extendhfsf2 at PLT
+; AVX-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill
+; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
+; AVX-NEXT: # xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
+; AVX-NEXT: callq __truncsfhf2 at PLT
+; AVX-NEXT: callq __extendhfsf2 at PLT
+; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; AVX-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX-NEXT: callq __truncsfhf2 at PLT
+; AVX-NEXT: callq __extendhfsf2 at PLT
+; AVX-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX-NEXT: callq __truncsfhf2 at PLT
+; AVX-NEXT: callq __extendhfsf2 at PLT
+; AVX-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX-NEXT: callq __truncsfhf2 at PLT
+; AVX-NEXT: popq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: complex_canonicalize_fmul_half:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: pushq %rax
+; AVX2-NEXT: .cfi_def_cfa_offset 16
+; AVX2-NEXT: vmovss %xmm1, (%rsp) # 4-byte Spill
+; AVX2-NEXT: callq __extendhfsf2 at PLT
+; AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; AVX2-NEXT: vmovss (%rsp), %xmm0 # 4-byte Reload
+; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
+; AVX2-NEXT: callq __extendhfsf2 at PLT
+; AVX2-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill
+; AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
+; AVX2-NEXT: # xmm1 = mem[0],zero,zero,zero
+; AVX2-NEXT: vsubss %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: callq __truncsfhf2 at PLT
+; AVX2-NEXT: callq __extendhfsf2 at PLT
+; AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; AVX2-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX2-NEXT: callq __truncsfhf2 at PLT
+; AVX2-NEXT: callq __extendhfsf2 at PLT
+; AVX2-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX2-NEXT: callq __truncsfhf2 at PLT
+; AVX2-NEXT: callq __extendhfsf2 at PLT
+; AVX2-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX2-NEXT: callq __truncsfhf2 at PLT
+; AVX2-NEXT: popq %rax
+; AVX2-NEXT: .cfi_def_cfa_offset 8
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: complex_canonicalize_fmul_half:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vpextrw $0, %xmm1, %eax
+; AVX512F-NEXT: vpextrw $0, %xmm0, %ecx
+; AVX512F-NEXT: vmovd %ecx, %xmm0
+; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512F-NEXT: vmovd %eax, %xmm1
+; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1
+; AVX512F-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512F-NEXT: vaddss %xmm1, %xmm0, %xmm2
+; AVX512F-NEXT: vcvtps2ph $4, %xmm2, %xmm2
+; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2
+; AVX512F-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512F-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512F-NEXT: vmovd %xmm0, %eax
+; AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: complex_canonicalize_fmul_half:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: vpextrw $0, %xmm1, %eax
+; AVX512BW-NEXT: vpextrw $0, %xmm0, %ecx
+; AVX512BW-NEXT: vmovd %ecx, %xmm0
+; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %eax, %xmm1
+; AVX512BW-NEXT: vcvtph2ps %xmm1, %xmm1
+; AVX512BW-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512BW-NEXT: vaddss %xmm1, %xmm0, %xmm2
+; AVX512BW-NEXT: vcvtps2ph $4, %xmm2, %xmm2
+; AVX512BW-NEXT: vcvtph2ps %xmm2, %xmm2
+; AVX512BW-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512BW-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
entry:
%mul1 = fsub half %a, %b
@@ -20,17 +272,72 @@ entry:
}
define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) {
+; SSE-LABEL: complex_canonicalize_fmul_x86_fp80:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
+; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
+; SSE-NEXT: fsub %st(1), %st
+; SSE-NEXT: fld %st(0)
+; SSE-NEXT: fadd %st(2), %st
+; SSE-NEXT: fsubp %st, %st(1)
+; SSE-NEXT: fsubp %st, %st(1)
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: complex_canonicalize_fmul_x86_fp80:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: fldt {{[0-9]+}}(%rsp)
+; SSE2-NEXT: fldt {{[0-9]+}}(%rsp)
+; SSE2-NEXT: fsub %st(1), %st
+; SSE2-NEXT: fld %st(0)
+; SSE2-NEXT: fadd %st(2), %st
+; SSE2-NEXT: fsubp %st, %st(1)
+; SSE2-NEXT: fsubp %st, %st(1)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: complex_canonicalize_fmul_x86_fp80:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX-NEXT: fsub %st(1), %st
+; AVX-NEXT: fld %st(0)
+; AVX-NEXT: fadd %st(2), %st
+; AVX-NEXT: fsubp %st, %st(1)
+; AVX-NEXT: fsubp %st, %st(1)
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: complex_canonicalize_fmul_x86_fp80:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX2-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX2-NEXT: fsub %st(1), %st
+; AVX2-NEXT: fld %st(0)
+; AVX2-NEXT: fadd %st(2), %st
+; AVX2-NEXT: fsubp %st, %st(1)
+; AVX2-NEXT: fsubp %st, %st(1)
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: complex_canonicalize_fmul_x86_fp80:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: fsub %st(1), %st
+; AVX512F-NEXT: fld %st(0)
+; AVX512F-NEXT: fadd %st(2), %st
+; AVX512F-NEXT: fsubp %st, %st(1)
+; AVX512F-NEXT: fsubp %st, %st(1)
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: complex_canonicalize_fmul_x86_fp80:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512BW-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512BW-NEXT: fsub %st(1), %st
+; AVX512BW-NEXT: fld %st(0)
+; AVX512BW-NEXT: fadd %st(2), %st
+; AVX512BW-NEXT: fsubp %st, %st(1)
+; AVX512BW-NEXT: fsubp %st, %st(1)
+; AVX512BW-NEXT: retq
entry:
- ; CHECK-LABEL: complex_canonicalize_fmul_x86_fp80
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: fldt 24(%rsp)
- ; CHECK-NEXT: fldt 8(%rsp)
- ; CHECK-NEXT: fsub %st(1), %st
- ; CHECK-NEXT: fld %st(0)
- ; CHECK-NEXT: fadd %st(2), %st
- ; CHECK-NEXT: fsubp %st, %st(1)
- ; CHECK-NEXT: fsubp %st, %st(1)
- ; CHECK-NEXT: retq
%mul1 = fsub x86_fp80 %a, %b
%add = fadd x86_fp80 %mul1, %b
@@ -41,39 +348,203 @@ entry:
}
define bfloat @complex_canonicalize_fmul_bfloat(bfloat %a, bfloat %b) {
-; CHECK-LABEL: complex_canonicalize_fmul_bfloat:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmovw %xmm0, %eax
-; CHECK-NEXT: vmovw %xmm1, %ecx
-; CHECK-NEXT: shll $16, %ecx
-; CHECK-NEXT: vmovd %ecx, %xmm0
-; CHECK-NEXT: shll $16, %eax
-; CHECK-NEXT: vmovd %eax, %xmm1
-; CHECK-NEXT: vsubss %xmm0, %xmm1, %xmm1
-; CHECK-NEXT: vcvtneps2bf16 %xmm1, %xmm1
-; CHECK-NEXT: vmovw %xmm1, %eax
-; CHECK-NEXT: shll $16, %eax
-; CHECK-NEXT: vmovd %eax, %xmm1
-; CHECK-NEXT: vaddss %xmm0, %xmm1, %xmm2
-; CHECK-NEXT: vcvtneps2bf16 %xmm2, %xmm2
-; CHECK-NEXT: vmovw %xmm2, %eax
-; CHECK-NEXT: shll $16, %eax
-; CHECK-NEXT: vmovd %eax, %xmm2
-; CHECK-NEXT: vsubss %xmm1, %xmm2, %xmm1
-; CHECK-NEXT: vcvtneps2bf16 %xmm1, %xmm1
-; CHECK-NEXT: vmovw %xmm1, %eax
-; CHECK-NEXT: shll $16, %eax
-; CHECK-NEXT: vmovd %eax, %xmm1
-; CHECK-NEXT: vcvtneps2bf16 %xmm1, %xmm1
-; CHECK-NEXT: vmovw %xmm1, %eax
-; CHECK-NEXT: shll $16, %eax
-; CHECK-NEXT: vmovd %eax, %xmm1
-; CHECK-NEXT: vsubss %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0
-; CHECK-NEXT: vmovw %xmm0, %eax
-; CHECK-NEXT: vmovw %eax, %xmm0
-; CHECK-NEXT: retq
-
+; SSE-LABEL: complex_canonicalize_fmul_bfloat:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
+; SSE-NEXT: pextrw $0, %xmm0, %eax
+; SSE-NEXT: pextrw $0, %xmm1, %ecx
+; SSE-NEXT: shll $16, %ecx
+; SSE-NEXT: movd %ecx, %xmm1
+; SSE-NEXT: movd %xmm1, (%rsp) # 4-byte Folded Spill
+; SSE-NEXT: shll $16, %eax
+; SSE-NEXT: movd %eax, %xmm0
+; SSE-NEXT: subss %xmm1, %xmm0
+; SSE-NEXT: callq __truncsfbf2 at PLT
+; SSE-NEXT: pextrw $0, %xmm0, %eax
+; SSE-NEXT: shll $16, %eax
+; SSE-NEXT: movd %eax, %xmm0
+; SSE-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; SSE-NEXT: addss (%rsp), %xmm0 # 4-byte Folded Reload
+; SSE-NEXT: callq __truncsfbf2 at PLT
+; SSE-NEXT: pextrw $0, %xmm0, %eax
+; SSE-NEXT: shll $16, %eax
+; SSE-NEXT: movd %eax, %xmm0
+; SSE-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; SSE-NEXT: callq __truncsfbf2 at PLT
+; SSE-NEXT: pextrw $0, %xmm0, %eax
+; SSE-NEXT: shll $16, %eax
+; SSE-NEXT: movd %eax, %xmm0
+; SSE-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload
+; SSE-NEXT: callq __truncsfbf2 at PLT
+; SSE-NEXT: popq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: complex_canonicalize_fmul_bfloat:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: pushq %rax
+; SSE2-NEXT: .cfi_def_cfa_offset 16
+; SSE2-NEXT: pextrw $0, %xmm0, %eax
+; SSE2-NEXT: pextrw $0, %xmm1, %ecx
+; SSE2-NEXT: shll $16, %ecx
+; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: movd %xmm1, (%rsp) # 4-byte Folded Spill
+; SSE2-NEXT: shll $16, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: subss %xmm1, %xmm0
+; SSE2-NEXT: callq __truncsfbf2 at PLT
+; SSE2-NEXT: pextrw $0, %xmm0, %eax
+; SSE2-NEXT: shll $16, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; SSE2-NEXT: addss (%rsp), %xmm0 # 4-byte Folded Reload
+; SSE2-NEXT: callq __truncsfbf2 at PLT
+; SSE2-NEXT: pextrw $0, %xmm0, %eax
+; SSE2-NEXT: shll $16, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; SSE2-NEXT: callq __truncsfbf2 at PLT
+; SSE2-NEXT: pextrw $0, %xmm0, %eax
+; SSE2-NEXT: shll $16, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload
+; SSE2-NEXT: callq __truncsfbf2 at PLT
+; SSE2-NEXT: popq %rax
+; SSE2-NEXT: .cfi_def_cfa_offset 8
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: complex_canonicalize_fmul_bfloat:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 16
+; AVX-NEXT: vpextrw $0, %xmm0, %eax
+; AVX-NEXT: vpextrw $0, %xmm1, %ecx
+; AVX-NEXT: shll $16, %ecx
+; AVX-NEXT: vmovd %ecx, %xmm1
+; AVX-NEXT: vmovd %xmm1, (%rsp) # 4-byte Folded Spill
+; AVX-NEXT: shll $16, %eax
+; AVX-NEXT: vmovd %eax, %xmm0
+; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: callq __truncsfbf2 at PLT
+; AVX-NEXT: vpextrw $0, %xmm0, %eax
+; AVX-NEXT: shll $16, %eax
+; AVX-NEXT: vmovd %eax, %xmm0
+; AVX-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; AVX-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX-NEXT: callq __truncsfbf2 at PLT
+; AVX-NEXT: vpextrw $0, %xmm0, %eax
+; AVX-NEXT: shll $16, %eax
+; AVX-NEXT: vmovd %eax, %xmm0
+; AVX-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX-NEXT: callq __truncsfbf2 at PLT
+; AVX-NEXT: vpextrw $0, %xmm0, %eax
+; AVX-NEXT: shll $16, %eax
+; AVX-NEXT: vmovd %eax, %xmm0
+; AVX-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX-NEXT: callq __truncsfbf2 at PLT
+; AVX-NEXT: popq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: complex_canonicalize_fmul_bfloat:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: pushq %rax
+; AVX2-NEXT: .cfi_def_cfa_offset 16
+; AVX2-NEXT: vpextrw $0, %xmm0, %eax
+; AVX2-NEXT: vpextrw $0, %xmm1, %ecx
+; AVX2-NEXT: shll $16, %ecx
+; AVX2-NEXT: vmovd %ecx, %xmm1
+; AVX2-NEXT: vmovd %xmm1, (%rsp) # 4-byte Folded Spill
+; AVX2-NEXT: shll $16, %eax
+; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: callq __truncsfbf2 at PLT
+; AVX2-NEXT: vpextrw $0, %xmm0, %eax
+; AVX2-NEXT: shll $16, %eax
+; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; AVX2-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX2-NEXT: callq __truncsfbf2 at PLT
+; AVX2-NEXT: vpextrw $0, %xmm0, %eax
+; AVX2-NEXT: shll $16, %eax
+; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX2-NEXT: callq __truncsfbf2 at PLT
+; AVX2-NEXT: vpextrw $0, %xmm0, %eax
+; AVX2-NEXT: shll $16, %eax
+; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX2-NEXT: callq __truncsfbf2 at PLT
+; AVX2-NEXT: popq %rax
+; AVX2-NEXT: .cfi_def_cfa_offset 8
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: complex_canonicalize_fmul_bfloat:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: pushq %rax
+; AVX512F-NEXT: .cfi_def_cfa_offset 16
+; AVX512F-NEXT: vpextrw $0, %xmm0, %eax
+; AVX512F-NEXT: vpextrw $0, %xmm1, %ecx
+; AVX512F-NEXT: shll $16, %ecx
+; AVX512F-NEXT: vmovd %ecx, %xmm1
+; AVX512F-NEXT: vmovd %xmm1, (%rsp) # 4-byte Folded Spill
+; AVX512F-NEXT: shll $16, %eax
+; AVX512F-NEXT: vmovd %eax, %xmm0
+; AVX512F-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: callq __truncsfbf2 at PLT
+; AVX512F-NEXT: vpextrw $0, %xmm0, %eax
+; AVX512F-NEXT: shll $16, %eax
+; AVX512F-NEXT: vmovd %eax, %xmm0
+; AVX512F-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; AVX512F-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX512F-NEXT: callq __truncsfbf2 at PLT
+; AVX512F-NEXT: vpextrw $0, %xmm0, %eax
+; AVX512F-NEXT: shll $16, %eax
+; AVX512F-NEXT: vmovd %eax, %xmm0
+; AVX512F-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX512F-NEXT: callq __truncsfbf2 at PLT
+; AVX512F-NEXT: vpextrw $0, %xmm0, %eax
+; AVX512F-NEXT: shll $16, %eax
+; AVX512F-NEXT: vmovd %eax, %xmm0
+; AVX512F-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX512F-NEXT: callq __truncsfbf2 at PLT
+; AVX512F-NEXT: popq %rax
+; AVX512F-NEXT: .cfi_def_cfa_offset 8
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: complex_canonicalize_fmul_bfloat:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: pushq %rax
+; AVX512BW-NEXT: .cfi_def_cfa_offset 16
+; AVX512BW-NEXT: vpextrw $0, %xmm0, %eax
+; AVX512BW-NEXT: vpextrw $0, %xmm1, %ecx
+; AVX512BW-NEXT: shll $16, %ecx
+; AVX512BW-NEXT: vmovd %ecx, %xmm1
+; AVX512BW-NEXT: vmovd %xmm1, (%rsp) # 4-byte Folded Spill
+; AVX512BW-NEXT: shll $16, %eax
+; AVX512BW-NEXT: vmovd %eax, %xmm0
+; AVX512BW-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: callq __truncsfbf2 at PLT
+; AVX512BW-NEXT: vpextrw $0, %xmm0, %eax
+; AVX512BW-NEXT: shll $16, %eax
+; AVX512BW-NEXT: vmovd %eax, %xmm0
+; AVX512BW-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; AVX512BW-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX512BW-NEXT: callq __truncsfbf2 at PLT
+; AVX512BW-NEXT: vpextrw $0, %xmm0, %eax
+; AVX512BW-NEXT: shll $16, %eax
+; AVX512BW-NEXT: vmovd %eax, %xmm0
+; AVX512BW-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX512BW-NEXT: callq __truncsfbf2 at PLT
+; AVX512BW-NEXT: vpextrw $0, %xmm0, %eax
+; AVX512BW-NEXT: shll $16, %eax
+; AVX512BW-NEXT: vmovd %eax, %xmm0
+; AVX512BW-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX512BW-NEXT: callq __truncsfbf2 at PLT
+; AVX512BW-NEXT: popq %rax
+; AVX512BW-NEXT: .cfi_def_cfa_offset 8
+; AVX512BW-NEXT: retq
entry:
%sub1 = fsub bfloat %a, %b
@@ -85,14 +556,60 @@ entry:
}
define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 {
+; SSE-LABEL: canonicalize_fp64:
+; SSE: # %bb.0: # %start
+; SSE-NEXT: movapd %xmm0, %xmm2
+; SSE-NEXT: cmpunordsd %xmm0, %xmm2
+; SSE-NEXT: movapd %xmm2, %xmm3
+; SSE-NEXT: andpd %xmm1, %xmm3
+; SSE-NEXT: maxsd %xmm0, %xmm1
+; SSE-NEXT: andnpd %xmm1, %xmm2
+; SSE-NEXT: orpd %xmm3, %xmm2
+; SSE-NEXT: movapd %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: canonicalize_fp64:
+; SSE2: # %bb.0: # %start
+; SSE2-NEXT: movapd %xmm0, %xmm2
+; SSE2-NEXT: cmpunordsd %xmm0, %xmm2
+; SSE2-NEXT: movapd %xmm2, %xmm3
+; SSE2-NEXT: andpd %xmm1, %xmm3
+; SSE2-NEXT: maxsd %xmm0, %xmm1
+; SSE2-NEXT: andnpd %xmm1, %xmm2
+; SSE2-NEXT: orpd %xmm3, %xmm2
+; SSE2-NEXT: movapd %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: canonicalize_fp64:
+; AVX: # %bb.0: # %start
+; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
+; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: canonicalize_fp64:
+; AVX2: # %bb.0: # %start
+; AVX2-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
+; AVX2-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
+; AVX2-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: canonicalize_fp64:
+; AVX512F: # %bb.0: # %start
+; AVX512F-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
+; AVX512F-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
+; AVX512F-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1}
+; AVX512F-NEXT: vmovapd %xmm2, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: canonicalize_fp64:
+; AVX512BW: # %bb.0: # %start
+; AVX512BW-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
+; AVX512BW-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
+; AVX512BW-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1}
+; AVX512BW-NEXT: vmovapd %xmm2, %xmm0
+; AVX512BW-NEXT: retq
start:
- ; CHECK-LABEL: canonicalize_fp64:
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
- ; CHECK-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
- ; CHECK-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1}
- ; CHECK-NEXT: vmovapd %xmm2, %xmm0
- ; CHECK-NEXT: retq
%c = fcmp olt double %a, %b
%d = fcmp uno double %a, 0.000000e+00
@@ -103,14 +620,60 @@ start:
}
define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 {
+; SSE-LABEL: canonicalize_fp32:
+; SSE: # %bb.0: # %start
+; SSE-NEXT: movaps %xmm0, %xmm2
+; SSE-NEXT: cmpunordss %xmm0, %xmm2
+; SSE-NEXT: movaps %xmm2, %xmm3
+; SSE-NEXT: andps %xmm1, %xmm3
+; SSE-NEXT: maxss %xmm0, %xmm1
+; SSE-NEXT: andnps %xmm1, %xmm2
+; SSE-NEXT: orps %xmm3, %xmm2
+; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: canonicalize_fp32:
+; SSE2: # %bb.0: # %start
+; SSE2-NEXT: movaps %xmm0, %xmm2
+; SSE2-NEXT: cmpunordss %xmm0, %xmm2
+; SSE2-NEXT: movaps %xmm2, %xmm3
+; SSE2-NEXT: andps %xmm1, %xmm3
+; SSE2-NEXT: maxss %xmm0, %xmm1
+; SSE2-NEXT: andnps %xmm1, %xmm2
+; SSE2-NEXT: orps %xmm3, %xmm2
+; SSE2-NEXT: movaps %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: canonicalize_fp32:
+; AVX: # %bb.0: # %start
+; AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm2
+; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: canonicalize_fp32:
+; AVX2: # %bb.0: # %start
+; AVX2-NEXT: vmaxss %xmm0, %xmm1, %xmm2
+; AVX2-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
+; AVX2-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: canonicalize_fp32:
+; AVX512F: # %bb.0: # %start
+; AVX512F-NEXT: vmaxss %xmm0, %xmm1, %xmm2
+; AVX512F-NEXT: vcmpunordss %xmm0, %xmm0, %k1
+; AVX512F-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
+; AVX512F-NEXT: vmovaps %xmm2, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: canonicalize_fp32:
+; AVX512BW: # %bb.0: # %start
+; AVX512BW-NEXT: vmaxss %xmm0, %xmm1, %xmm2
+; AVX512BW-NEXT: vcmpunordss %xmm0, %xmm0, %k1
+; AVX512BW-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
+; AVX512BW-NEXT: vmovaps %xmm2, %xmm0
+; AVX512BW-NEXT: retq
start:
- ; CHECK-LABEL: canonicalize_fp32:
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: vmaxss %xmm0, %xmm1, %xmm2
- ; CHECK-NEXT: vcmpunordss %xmm0, %xmm0, %k1
- ; CHECK-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
- ; CHECK-NEXT: vmovaps %xmm2, %xmm0
- ; CHECK-NEXT: retq
%cc = fcmp olt float %aa, %bb
%dd = fcmp uno float %aa, 0.000000e+00
@@ -121,11 +684,47 @@ start:
}
define void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 {
- ; CHECK-LAEBL: v_test_canonicalize_var_f32
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: vmovss (%rdi), %xmm0
- ; CHECK-NEXT: vmovss %xmm0, (%rdi)
- ; CHECK-NEXT: retq
+; SSE-LABEL: v_test_canonicalize_var_f32:
+; SSE: # %bb.0:
+; SSE-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; SSE-NEXT: mulss (%rdi), %xmm0
+; SSE-NEXT: movss %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: v_test_canonicalize_var_f32:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; SSE2-NEXT: mulss (%rdi), %xmm0
+; SSE2-NEXT: movss %xmm0, (%rdi)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: v_test_canonicalize_var_f32:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmulss (%rdi), %xmm0, %xmm0
+; AVX-NEXT: vmovss %xmm0, (%rdi)
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: v_test_canonicalize_var_f32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX2-NEXT: vmulss (%rdi), %xmm0, %xmm0
+; AVX2-NEXT: vmovss %xmm0, (%rdi)
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: v_test_canonicalize_var_f32:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX512F-NEXT: vmulss (%rdi), %xmm0, %xmm0
+; AVX512F-NEXT: vmovss %xmm0, (%rdi)
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: v_test_canonicalize_var_f32:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX512BW-NEXT: vmulss (%rdi), %xmm0, %xmm0
+; AVX512BW-NEXT: vmovss %xmm0, (%rdi)
+; AVX512BW-NEXT: retq
%val = load float, float addrspace(1)* %out
%canonicalized = call float @llvm.canonicalize.f32(float %val)
store float %canonicalized, float addrspace(1)* %out
@@ -133,11 +732,53 @@ define void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 {
}
define void @v_test_canonicalize_x86_fp80(x86_fp80 addrspace(1)* %out) #1 {
- ; CHECK-LAEBL: v_test_canonicalize_x86_fp80
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: fldt (%rdi)
- ; CHECK-NEXT: fstpt (%rdi)
- ; CHECK-NEXT: retq
+; SSE-LABEL: v_test_canonicalize_x86_fp80:
+; SSE: # %bb.0:
+; SSE-NEXT: fldt (%rdi)
+; SSE-NEXT: fld1
+; SSE-NEXT: fmulp %st, %st(1)
+; SSE-NEXT: fstpt (%rdi)
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: v_test_canonicalize_x86_fp80:
+; SSE2: # %bb.0:
+; SSE2-NEXT: fldt (%rdi)
+; SSE2-NEXT: fld1
+; SSE2-NEXT: fmulp %st, %st(1)
+; SSE2-NEXT: fstpt (%rdi)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: v_test_canonicalize_x86_fp80:
+; AVX: # %bb.0:
+; AVX-NEXT: fldt (%rdi)
+; AVX-NEXT: fld1
+; AVX-NEXT: fmulp %st, %st(1)
+; AVX-NEXT: fstpt (%rdi)
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: v_test_canonicalize_x86_fp80:
+; AVX2: # %bb.0:
+; AVX2-NEXT: fldt (%rdi)
+; AVX2-NEXT: fld1
+; AVX2-NEXT: fmulp %st, %st(1)
+; AVX2-NEXT: fstpt (%rdi)
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: v_test_canonicalize_x86_fp80:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: fldt (%rdi)
+; AVX512F-NEXT: fld1
+; AVX512F-NEXT: fmulp %st, %st(1)
+; AVX512F-NEXT: fstpt (%rdi)
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: v_test_canonicalize_x86_fp80:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: fldt (%rdi)
+; AVX512BW-NEXT: fld1
+; AVX512BW-NEXT: fmulp %st, %st(1)
+; AVX512BW-NEXT: fstpt (%rdi)
+; AVX512BW-NEXT: retq
%val = load x86_fp80, x86_fp80 addrspace(1)* %out
%canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %val)
store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out
@@ -145,11 +786,127 @@ define void @v_test_canonicalize_x86_fp80(x86_fp80 addrspace(1)* %out) #1 {
}
define void @v_test_canonicalize__half(half addrspace(1)* %out) {
-; CHECK-LABEL: v_test_canonicalize__half:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmovsh (%rdi), %xmm0 # xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: vmovsh %xmm0, (%rdi)
-; CHECK-NEXT: retq
+; SSE-LABEL: v_test_canonicalize__half:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: pushq %rbx
+; SSE-NEXT: .cfi_def_cfa_offset 16
+; SSE-NEXT: subq $16, %rsp
+; SSE-NEXT: .cfi_def_cfa_offset 32
+; SSE-NEXT: .cfi_offset %rbx, -16
+; SSE-NEXT: movq %rdi, %rbx
+; SSE-NEXT: pinsrw $0, (%rdi), %xmm0
+; SSE-NEXT: callq __extendhfsf2 at PLT
+; SSE-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; SSE-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: callq __extendhfsf2 at PLT
+; SSE-NEXT: mulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; SSE-NEXT: callq __truncsfhf2 at PLT
+; SSE-NEXT: pextrw $0, %xmm0, %eax
+; SSE-NEXT: movw %ax, (%rbx)
+; SSE-NEXT: addq $16, %rsp
+; SSE-NEXT: .cfi_def_cfa_offset 16
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: v_test_canonicalize__half:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: pushq %rbx
+; SSE2-NEXT: .cfi_def_cfa_offset 16
+; SSE2-NEXT: subq $16, %rsp
+; SSE2-NEXT: .cfi_def_cfa_offset 32
+; SSE2-NEXT: .cfi_offset %rbx, -16
+; SSE2-NEXT: movq %rdi, %rbx
+; SSE2-NEXT: pinsrw $0, (%rdi), %xmm0
+; SSE2-NEXT: callq __extendhfsf2 at PLT
+; SSE2-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: callq __extendhfsf2 at PLT
+; SSE2-NEXT: mulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; SSE2-NEXT: callq __truncsfhf2 at PLT
+; SSE2-NEXT: pextrw $0, %xmm0, %eax
+; SSE2-NEXT: movw %ax, (%rbx)
+; SSE2-NEXT: addq $16, %rsp
+; SSE2-NEXT: .cfi_def_cfa_offset 16
+; SSE2-NEXT: popq %rbx
+; SSE2-NEXT: .cfi_def_cfa_offset 8
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: v_test_canonicalize__half:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: pushq %rbx
+; AVX-NEXT: .cfi_def_cfa_offset 16
+; AVX-NEXT: subq $16, %rsp
+; AVX-NEXT: .cfi_def_cfa_offset 32
+; AVX-NEXT: .cfi_offset %rbx, -16
+; AVX-NEXT: movq %rdi, %rbx
+; AVX-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
+; AVX-NEXT: callq __extendhfsf2 at PLT
+; AVX-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: callq __extendhfsf2 at PLT
+; AVX-NEXT: vmulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX-NEXT: callq __truncsfhf2 at PLT
+; AVX-NEXT: vpextrw $0, %xmm0, (%rbx)
+; AVX-NEXT: addq $16, %rsp
+; AVX-NEXT: .cfi_def_cfa_offset 16
+; AVX-NEXT: popq %rbx
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: v_test_canonicalize__half:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: pushq %rbx
+; AVX2-NEXT: .cfi_def_cfa_offset 16
+; AVX2-NEXT: subq $16, %rsp
+; AVX2-NEXT: .cfi_def_cfa_offset 32
+; AVX2-NEXT: .cfi_offset %rbx, -16
+; AVX2-NEXT: movq %rdi, %rbx
+; AVX2-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
+; AVX2-NEXT: callq __extendhfsf2 at PLT
+; AVX2-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: callq __extendhfsf2 at PLT
+; AVX2-NEXT: vmulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX2-NEXT: callq __truncsfhf2 at PLT
+; AVX2-NEXT: vpextrw $0, %xmm0, (%rbx)
+; AVX2-NEXT: addq $16, %rsp
+; AVX2-NEXT: .cfi_def_cfa_offset 16
+; AVX2-NEXT: popq %rbx
+; AVX2-NEXT: .cfi_def_cfa_offset 8
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: v_test_canonicalize__half:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: movzwl (%rdi), %eax
+; AVX512F-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx
+; AVX512F-NEXT: vmovd %ecx, %xmm0
+; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512F-NEXT: vmovd %eax, %xmm1
+; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1
+; AVX512F-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512F-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512F-NEXT: vmovd %xmm0, %eax
+; AVX512F-NEXT: movw %ax, (%rdi)
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: v_test_canonicalize__half:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: movzwl (%rdi), %eax
+; AVX512BW-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx
+; AVX512BW-NEXT: vmovd %ecx, %xmm0
+; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %eax, %xmm1
+; AVX512BW-NEXT: vcvtph2ps %xmm1, %xmm1
+; AVX512BW-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512BW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: movw %ax, (%rdi)
+; AVX512BW-NEXT: retq
entry:
%val = load half, half addrspace(1)* %out
%canonicalized = call half @llvm.canonicalize.f16(half %val)
@@ -158,11 +915,47 @@ entry:
}
define void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 {
- ; CHECK-LAEBL: v_test_canonicalize_var_f64
- ; CHECK: # %bb.0:
- ; CHECK-NEXT: vmovsd (%rdi), %xmm0 # xmm0 = mem[0],zero
- ; CHECK-NEXT: vmovsd %xmm0, (%rdi)
- ; CHECK-NEXT: retq
+; SSE-LABEL: v_test_canonicalize_var_f64:
+; SSE: # %bb.0:
+; SSE-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
+; SSE-NEXT: mulsd (%rdi), %xmm0
+; SSE-NEXT: movsd %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: v_test_canonicalize_var_f64:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
+; SSE2-NEXT: mulsd (%rdi), %xmm0
+; SSE2-NEXT: movsd %xmm0, (%rdi)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: v_test_canonicalize_var_f64:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
+; AVX-NEXT: vmulsd (%rdi), %xmm0, %xmm0
+; AVX-NEXT: vmovsd %xmm0, (%rdi)
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: v_test_canonicalize_var_f64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
+; AVX2-NEXT: vmulsd (%rdi), %xmm0, %xmm0
+; AVX2-NEXT: vmovsd %xmm0, (%rdi)
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: v_test_canonicalize_var_f64:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
+; AVX512F-NEXT: vmulsd (%rdi), %xmm0, %xmm0
+; AVX512F-NEXT: vmovsd %xmm0, (%rdi)
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: v_test_canonicalize_var_f64:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
+; AVX512BW-NEXT: vmulsd (%rdi), %xmm0, %xmm0
+; AVX512BW-NEXT: vmovsd %xmm0, (%rdi)
+; AVX512BW-NEXT: retq
%val = load double, double addrspace(1)* %out
%canonicalized = call double @llvm.canonicalize.f64(double %val)
store double %canonicalized, double addrspace(1)* %out
@@ -170,15 +963,29 @@ define void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 {
}
define void @v_test_canonicalize__bfloat(bfloat addrspace(1)* %out) {
-; CHECK-LABEL: v_test_canonicalize__bfloat:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movzwl (%rdi), %eax
-; CHECK-NEXT: shll $16, %eax
-; CHECK-NEXT: vmovd %eax, %xmm0
-; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0
-; CHECK-NEXT: vpextrw $0, %xmm0, (%rdi)
-; CHECK-NEXT: retq
-
+; SSE-LABEL: v_test_canonicalize__bfloat:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: retq
+;
+; SSE2-LABEL: v_test_canonicalize__bfloat:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: v_test_canonicalize__bfloat:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: v_test_canonicalize__bfloat:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: v_test_canonicalize__bfloat:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: v_test_canonicalize__bfloat:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: retq
entry:
%val = load bfloat, bfloat addrspace(1)* %out
%canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat %val)
@@ -190,4 +997,4 @@ declare double @llvm.canonicalize.f64(double)
declare float @llvm.canonicalize.f32(float)
declare bfloat @llvm.canonicalize.bf16(bfloat)
declare x86_fp80 @llvm.canonicalize.f80(x86_fp80)
-declare half @llvm.canonicalize.f16(half)
\ No newline at end of file
+declare half @llvm.canonicalize.f16(half)
>From d40523083c236995400c5d44444fdfdd20560d71 Mon Sep 17 00:00:00 2001
From: Pawan Anil Nirpal <pawan.anil.nirpal at intel.com>
Date: Fri, 6 Sep 2024 10:49:32 +0200
Subject: [PATCH 3/5] addressing review comments, simplify condtions
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 18 ++++++++----------
1 file changed, 8 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4b9f0326e4d46d..c9227be5d6c297 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58077,14 +58077,12 @@ SDValue combineConstantCanonicalize(SDNode *Node, SelectionDAG &DAG) {
DAG.getMachineFunction().getDenormalMode(C.getSemantics());
assert((Mode != DenormalMode::getPositiveZero()) &&
"Positive denormal mode is not valid for X86 target.");
- if (Mode == DenormalMode::getPreserveSign()) {
- SDValue SDZero =
- DAG.getConstantFP((C.isNegative() ? -0.0 : 0.0), dl, VT);
- return SDZero;
- } else if (Mode == DenormalMode::getIEEE()) {
+ if (Mode == DenormalMode::getPreserveSign())
+ return DAG.getConstantFP((C.isNegative() ? -0.0 : 0.0), dl, VT);
+ if (Mode == DenormalMode::getIEEE() || Mode == DenormalMode::getDynamic())
return Operand;
- }
- } else if (C.isNaN() && C.isSignaling()) {
+ }
+ if (C.isNaN() && C.isSignaling()) {
APFloat CanonicalQNaN = APFloat::getQNaN(C.getSemantics());
SDValue QuitNaN = DAG.getConstantFP(CanonicalQNaN, dl, VT);
return QuitNaN;
@@ -58094,7 +58092,7 @@ SDValue combineConstantCanonicalize(SDNode *Node, SelectionDAG &DAG) {
}
SDValue findLastStrictOpChain(SDNode *N, SelectionDAG &DAG) {
- assert(N!=nullptr && "Trying to find last chain for a NULL Node");
+ assert(N != nullptr && "Trying to find last chain for a NULL Node");
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
SDValue Op = N->getOperand(i);
if (Op.getValueType() == MVT::Other && Op.getNode()->isStrictFPOpcode())
@@ -58104,10 +58102,10 @@ SDValue findLastStrictOpChain(SDNode *N, SelectionDAG &DAG) {
}
bool isNonCanonicalizingOperation(SDNode *N) {
- assert(N!=nullptr && "Trying to check canonical opcode for a NULL Node");
+ assert(N != nullptr && "Trying to check canonical opcode for a NULL Node");
unsigned Opc = N->getOpcode();
switch (Opc) {
- // Ensure these are the exasustive set of non canonicalizing opcodes. Add more
+ // Ensure these are the exhaustive set of non canonicalizing opcodes. Add more
// if not.
case X86::RET:
case ISD::STORE:
>From 317dd6f68da03de47fe525fbc5453494ec16c059 Mon Sep 17 00:00:00 2001
From: Pawan Anil Nirpal <pawan.anil.nirpal at intel.com>
Date: Tue, 10 Sep 2024 15:10:50 +0200
Subject: [PATCH 4/5] Removed constant folding for another patch, moving undef
canonicalize to generic dag combiner
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 +
llvm/lib/Target/X86/X86ISelLowering.cpp | 94 +-
.../CodeGen/X86/canonicalize-constants.ll | 583 -----
.../CodeGen/X86/canonicalize-subnormals.ll | 1888 -----------------
llvm/test/CodeGen/X86/canonicalize-vars.ll | 441 ++--
5 files changed, 164 insertions(+), 2852 deletions(-)
delete mode 100644 llvm/test/CodeGen/X86/canonicalize-constants.ll
delete mode 100644 llvm/test/CodeGen/X86/canonicalize-subnormals.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 37272a09b336ab..ef989ea3190274 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1980,6 +1980,16 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FREEZE: return visitFREEZE(N);
case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
+ case ISD::FCANONICALIZE:{
+ SDValue Operand = N->getOperand(0);
+ EVT VT = Operand.getValueType();
+ SDLoc dl(N);
+ if(Operand.isUndef()){
+ APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
+ return DAG.getConstantFP(CanonicalQNaN, dl, VT);
+ }
+ break;
+ }
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_ADD:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c9227be5d6c297..8dcb52ac611d64 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58066,81 +58066,11 @@ static SDValue combineINTRINSIC_VOID(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-SDValue combineConstantCanonicalize(SDNode *Node, SelectionDAG &DAG) {
- SDValue Operand = Node->getOperand(0);
- SDLoc dl(Node);
- EVT VT = Operand.getValueType();
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Operand)) {
- const APFloat &C = CFP->getValueAPF();
- if (C.isDenormal()) {
- DenormalMode Mode =
- DAG.getMachineFunction().getDenormalMode(C.getSemantics());
- assert((Mode != DenormalMode::getPositiveZero()) &&
- "Positive denormal mode is not valid for X86 target.");
- if (Mode == DenormalMode::getPreserveSign())
- return DAG.getConstantFP((C.isNegative() ? -0.0 : 0.0), dl, VT);
- if (Mode == DenormalMode::getIEEE() || Mode == DenormalMode::getDynamic())
- return Operand;
- }
- if (C.isNaN() && C.isSignaling()) {
- APFloat CanonicalQNaN = APFloat::getQNaN(C.getSemantics());
- SDValue QuitNaN = DAG.getConstantFP(CanonicalQNaN, dl, VT);
- return QuitNaN;
- }
- }
- return Operand;
-}
-
-SDValue findLastStrictOpChain(SDNode *N, SelectionDAG &DAG) {
- assert(N != nullptr && "Trying to find last chain for a NULL Node");
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- SDValue Op = N->getOperand(i);
- if (Op.getValueType() == MVT::Other && Op.getNode()->isStrictFPOpcode())
- return Op;
- }
- return DAG.getEntryNode();
-}
-
-bool isNonCanonicalizingOperation(SDNode *N) {
- assert(N != nullptr && "Trying to check canonical opcode for a NULL Node");
- unsigned Opc = N->getOpcode();
- switch (Opc) {
- // Ensure these are the exhaustive set of non canonicalizing opcodes. Add more
- // if not.
- case X86::RET:
- case ISD::STORE:
- case ISD::SETCC:
- case X86ISD::FCMP:
- return true;
- default:
- return false;
- }
-}
-
-bool isUsedByNonCanonicalizingOp(SDNode *N) {
- for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
- ++UI) {
- SDNode *User = *UI;
- if (isNonCanonicalizingOperation(User))
- return true;
- }
- return false;
-}
-
SDValue combineCanonicalize(SDNode *Node, SelectionDAG &DAG) {
SDValue Operand = Node->getOperand(0);
EVT VT = Operand.getValueType();
SDLoc dl(Node);
- if (auto *CFP = dyn_cast<ConstantFPSDNode>(Operand))
- return combineConstantCanonicalize(Node, DAG);
-
- if (Operand.isUndef()) {
- APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
- SDValue QuitNaN = DAG.getConstantFP(CanonicalQNaN, dl, VT);
- return QuitNaN;
- }
-
// Canonicalize scalar variable FP Nodes.
SDValue MulNode;
SDValue One;
@@ -58157,28 +58087,18 @@ SDValue combineCanonicalize(SDNode *Node, SelectionDAG &DAG) {
One = DAG.getConstantFP(Val, dl, VT);
} else {
// Is it better to assert? when we encounter an unknown FP type,Than to
- // just replace with the operand! As this might be our last attempt at
- // legalization.
+ // just replace with the operand!
return Operand;
}
- // Store, return, and compare are non-canonicalizing operations. If a
- // non-canonicalizing operation uses the rest then mul * 1.0 must be generated
- // int those cases.
- // TODO: For now Preventing bf16 from generating strict_fmul as it
- // leads to a crash SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul t0,
+ // TODO: Fix Crash for bf16 when generating strict_fmul as it
+ // leads to a error : SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul t0,
// ConstantFP:bf16<APFloat(16256)>, t5 LLVM ERROR: Do not know how to soft
// promote this operator's result!
- if (isUsedByNonCanonicalizingOp(Node) && VT != MVT::bf16) {
- SDValue Chain = findLastStrictOpChain(Node, DAG);
- // TODO : Follow-up with tablegen pattern to generate mul * 1.0.
- SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},
- {Chain, One, Operand});
-
- return StrictFmul;
- }
-
- return Operand;
+ SDValue Chain = DAG.getEntryNode();
+ SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},
+ {Chain, One, Operand});
+ return StrictFmul;
// TODO : Hanlde vectors.
}
diff --git a/llvm/test/CodeGen/X86/canonicalize-constants.ll b/llvm/test/CodeGen/X86/canonicalize-constants.ll
deleted file mode 100644
index b1a9733806d40e..00000000000000
--- a/llvm/test/CodeGen/X86/canonicalize-constants.ll
+++ /dev/null
@@ -1,583 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5
-; RUN: llc -mattr=sse -mtriple=x86_64 < %s | FileCheck %s -check-prefix=SSE
-; RUN: llc -mattr=sse2 -mtriple=x86_64 < %s | FileCheck %s -check-prefix=SSE2
-; RUN: llc -mattr=+avx -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX
-; RUN: llc -mattr=+avx2 -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX2
-; RUN: llc -mattr=+avx512f -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX512F
-; RUN: llc -mattr=+avx512bw -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX512BW
-
-define float @canon_fp32() {
-; SSE-LABEL: canon_fp32:
-; SSE: # %bb.0:
-; SSE-NEXT: movss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; SSE-NEXT: retq
-;
-; SSE2-LABEL: canon_fp32:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: canon_fp32:
-; AVX: # %bb.0:
-; AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; AVX-NEXT: retq
-;
-; AVX2-LABEL: canon_fp32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: canon_fp32:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: canon_fp32:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; AVX512BW-NEXT: retq
- %canonicalized = call float @llvm.canonicalize.f32(float 3.0)
- ret float %canonicalized
-}
-
-define half @canon_fp16() {
-; SSE-LABEL: canon_fp16:
-; SSE: # %bb.0:
-; SSE-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-NEXT: retq
-;
-; SSE2-LABEL: canon_fp16:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: canon_fp16:
-; AVX: # %bb.0:
-; AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: retq
-;
-; AVX2-LABEL: canon_fp16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: canon_fp16:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: canon_fp16:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512BW-NEXT: retq
- %canonicalized = call half @llvm.canonicalize.f16(half 0xH4200) ; half 3.0
- ret half %canonicalized
-}
-
-define double @canon_fp64() {
-; SSE-LABEL: canon_fp64:
-; SSE: # %bb.0:
-; SSE-NEXT: movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
-; SSE-NEXT: retq
-;
-; SSE2-LABEL: canon_fp64:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: canon_fp64:
-; AVX: # %bb.0:
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
-; AVX-NEXT: retq
-;
-; AVX2-LABEL: canon_fp64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: canon_fp64:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: canon_fp64:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
-; AVX512BW-NEXT: retq
- %canonicalized = call double @llvm.canonicalize.f64(double 3.0)
- ret double %canonicalized
-}
-
-define x86_fp80 @canon_fp80() {
-; SSE-LABEL: canon_fp80:
-; SSE: # %bb.0:
-; SSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; SSE-NEXT: retq
-;
-; SSE2-LABEL: canon_fp80:
-; SSE2: # %bb.0:
-; SSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: canon_fp80:
-; AVX: # %bb.0:
-; AVX-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; AVX-NEXT: retq
-;
-; AVX2-LABEL: canon_fp80:
-; AVX2: # %bb.0:
-; AVX2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: canon_fp80:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: canon_fp80:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; AVX512BW-NEXT: retq
- %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK4005B400000000000000) ; 90.0
- ret x86_fp80 %canonicalized
-}
-
-
-define x86_fp80 @complex_canonicalize_x86_fp80(x86_fp80 %a, x86_fp80 %b) {
-; SSE-LABEL: complex_canonicalize_x86_fp80:
-; SSE: # %bb.0: # %entry
-; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
-; SSE-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; SSE-NEXT: retq
-;
-; SSE2-LABEL: complex_canonicalize_x86_fp80:
-; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: fldt {{[0-9]+}}(%rsp)
-; SSE2-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: complex_canonicalize_x86_fp80:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
-; AVX-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; AVX-NEXT: retq
-;
-; AVX2-LABEL: complex_canonicalize_x86_fp80:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: fldt {{[0-9]+}}(%rsp)
-; AVX2-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: complex_canonicalize_x86_fp80:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp)
-; AVX512F-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: complex_canonicalize_x86_fp80:
-; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: fldt {{[0-9]+}}(%rsp)
-; AVX512BW-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; AVX512BW-NEXT: retq
-entry:
- %mul1 = fsub x86_fp80 %a, %b
- %add = fadd x86_fp80 %mul1, %b
- %mul2 = fsub x86_fp80 %add, %mul1
- %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK4005B400000000000000)
- %result = fsub x86_fp80 %canonicalized, %b
- ret x86_fp80 %result
-}
-
-define double @complex_canonicalize_fp64(double %a, double %b) unnamed_addr #0 {
-; SSE-LABEL: complex_canonicalize_fp64:
-; SSE: # %bb.0: # %start
-; SSE-NEXT: movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
-; SSE-NEXT: retq
-;
-; SSE2-LABEL: complex_canonicalize_fp64:
-; SSE2: # %bb.0: # %start
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: complex_canonicalize_fp64:
-; AVX: # %bb.0: # %start
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
-; AVX-NEXT: retq
-;
-; AVX2-LABEL: complex_canonicalize_fp64:
-; AVX2: # %bb.0: # %start
-; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: complex_canonicalize_fp64:
-; AVX512F: # %bb.0: # %start
-; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: complex_canonicalize_fp64:
-; AVX512BW: # %bb.0: # %start
-; AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
-; AVX512BW-NEXT: retq
-start:
- %c = fcmp olt double %a, %b
- %d = fcmp uno double %a, 0.000000e+00
- %or.cond.i.i = or i1 %d, %c
- %e = select i1 %or.cond.i.i, double %b, double %a
- %f = tail call double @llvm.canonicalize.f64(double 3.0) #2
- ret double %f
-}
-
-define void @test_fold_canonicalize_p0_f32(float addrspace(1)* %out) #1 {
-; SSE-LABEL: test_fold_canonicalize_p0_f32:
-; SSE: # %bb.0:
-; SSE-NEXT: movl $0, (%rdi)
-; SSE-NEXT: retq
-;
-; SSE2-LABEL: test_fold_canonicalize_p0_f32:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movl $0, (%rdi)
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: test_fold_canonicalize_p0_f32:
-; AVX: # %bb.0:
-; AVX-NEXT: movl $0, (%rdi)
-; AVX-NEXT: retq
-;
-; AVX2-LABEL: test_fold_canonicalize_p0_f32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: movl $0, (%rdi)
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: test_fold_canonicalize_p0_f32:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: movl $0, (%rdi)
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: test_fold_canonicalize_p0_f32:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: movl $0, (%rdi)
-; AVX512BW-NEXT: retq
- %canonicalized = call float @llvm.canonicalize.f32(float 0.0)
- store float %canonicalized, float addrspace(1)* %out
- ret void
-}
-
-define void @test_fold_canonicalize_n0_f32(float addrspace(1)* %out) #1 {
-; SSE-LABEL: test_fold_canonicalize_n0_f32:
-; SSE: # %bb.0:
-; SSE-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
-; SSE-NEXT: retq
-;
-; SSE2-LABEL: test_fold_canonicalize_n0_f32:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: test_fold_canonicalize_n0_f32:
-; AVX: # %bb.0:
-; AVX-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
-; AVX-NEXT: retq
-;
-; AVX2-LABEL: test_fold_canonicalize_n0_f32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: test_fold_canonicalize_n0_f32:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: test_fold_canonicalize_n0_f32:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
-; AVX512BW-NEXT: retq
- %canonicalized = call float @llvm.canonicalize.f32(float -0.0)
- store float %canonicalized, float addrspace(1)* %out
- ret void
-}
-
-
-define void @v_test_canonicalize_p90_x86_fp80(x86_fp80 addrspace(1)* %out) #1 {
-; SSE-LABEL: v_test_canonicalize_p90_x86_fp80:
-; SSE: # %bb.0:
-; SSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; SSE-NEXT: fstpt (%rdi)
-; SSE-NEXT: retq
-;
-; SSE2-LABEL: v_test_canonicalize_p90_x86_fp80:
-; SSE2: # %bb.0:
-; SSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; SSE2-NEXT: fstpt (%rdi)
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: v_test_canonicalize_p90_x86_fp80:
-; AVX: # %bb.0:
-; AVX-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; AVX-NEXT: fstpt (%rdi)
-; AVX-NEXT: retq
-;
-; AVX2-LABEL: v_test_canonicalize_p90_x86_fp80:
-; AVX2: # %bb.0:
-; AVX2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; AVX2-NEXT: fstpt (%rdi)
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: v_test_canonicalize_p90_x86_fp80:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; AVX512F-NEXT: fstpt (%rdi)
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: v_test_canonicalize_p90_x86_fp80:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; AVX512BW-NEXT: fstpt (%rdi)
-; AVX512BW-NEXT: retq
- %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK4005B400000000000000)
- store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out
- ret void
-}
-
-define void @v_test_canonicalize_p3__half(half addrspace(1)* %out) {
-; SSE-LABEL: v_test_canonicalize_p3__half:
-; SSE: # %bb.0: # %entry
-; SSE-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-NEXT: pextrw $0, %xmm0, %eax
-; SSE-NEXT: movw %ax, (%rdi)
-; SSE-NEXT: retq
-;
-; SSE2-LABEL: v_test_canonicalize_p3__half:
-; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: pextrw $0, %xmm0, %eax
-; SSE2-NEXT: movw %ax, (%rdi)
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: v_test_canonicalize_p3__half:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
-; AVX-NEXT: retq
-;
-; AVX2-LABEL: v_test_canonicalize_p3__half:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: v_test_canonicalize_p3__half:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: v_test_canonicalize_p3__half:
-; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
-; AVX512BW-NEXT: retq
-entry:
- %canonicalized = call half @llvm.canonicalize.f16(half 0xH4200)
- store half %canonicalized, half addrspace(1)* %out
- ret void
-}
-
-define void @v_test_canonicalize_p3_f64(double addrspace(1)* %out) #1 {
-; SSE-LABEL: v_test_canonicalize_p3_f64:
-; SSE: # %bb.0: # %entry
-; SSE-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000
-; SSE-NEXT: movq %rax, (%rdi)
-; SSE-NEXT: retq
-;
-; SSE2-LABEL: v_test_canonicalize_p3_f64:
-; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000
-; SSE2-NEXT: movq %rax, (%rdi)
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: v_test_canonicalize_p3_f64:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000
-; AVX-NEXT: movq %rax, (%rdi)
-; AVX-NEXT: retq
-;
-; AVX2-LABEL: v_test_canonicalize_p3_f64:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000
-; AVX2-NEXT: movq %rax, (%rdi)
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: v_test_canonicalize_p3_f64:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000
-; AVX512F-NEXT: movq %rax, (%rdi)
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: v_test_canonicalize_p3_f64:
-; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000
-; AVX512BW-NEXT: movq %rax, (%rdi)
-; AVX512BW-NEXT: retq
-entry:
- %canonicalized = call double @llvm.canonicalize.f64(double 3.0)
- store double %canonicalized, double addrspace(1)* %out
- ret void
-}
-
-define void @v_test_canonicalize_p3__bfloat(bfloat addrspace(1)* %out) {
-; SSE-LABEL: v_test_canonicalize_p3__bfloat:
-; SSE: # %bb.0: # %entry
-; SSE-NEXT: movw $16448, (%rdi) # imm = 0x4040
-; SSE-NEXT: retq
-;
-; SSE2-LABEL: v_test_canonicalize_p3__bfloat:
-; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movw $16448, (%rdi) # imm = 0x4040
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: v_test_canonicalize_p3__bfloat:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: movw $16448, (%rdi) # imm = 0x4040
-; AVX-NEXT: retq
-;
-; AVX2-LABEL: v_test_canonicalize_p3__bfloat:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: movw $16448, (%rdi) # imm = 0x4040
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: v_test_canonicalize_p3__bfloat:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: movw $16448, (%rdi) # imm = 0x4040
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: v_test_canonicalize_p3__bfloat:
-; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: movw $16448, (%rdi) # imm = 0x4040
-; AVX512BW-NEXT: retq
-entry:
- %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat 3.0)
- store bfloat %canonicalized, bfloat addrspace(1)* %out
- ret void
-}
-
-define void @v_test_canonicalize_n3__bfloat(bfloat addrspace(1)* %out) {
-; SSE-LABEL: v_test_canonicalize_n3__bfloat:
-; SSE: # %bb.0: # %entry
-; SSE-NEXT: movw $-16320, (%rdi) # imm = 0xC040
-; SSE-NEXT: retq
-;
-; SSE2-LABEL: v_test_canonicalize_n3__bfloat:
-; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movw $-16320, (%rdi) # imm = 0xC040
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: v_test_canonicalize_n3__bfloat:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: movw $-16320, (%rdi) # imm = 0xC040
-; AVX-NEXT: retq
-;
-; AVX2-LABEL: v_test_canonicalize_n3__bfloat:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: movw $-16320, (%rdi) # imm = 0xC040
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: v_test_canonicalize_n3__bfloat:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: movw $-16320, (%rdi) # imm = 0xC040
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: v_test_canonicalize_n3__bfloat:
-; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: movw $-16320, (%rdi) # imm = 0xC040
-; AVX512BW-NEXT: retq
-entry:
- %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat -3.0)
- store bfloat %canonicalized, bfloat addrspace(1)* %out
- ret void
-}
-
-define void @v_test_canonicalize_n90_x86_fp80(x86_fp80 addrspace(1)* %out) #1 {
-; SSE-LABEL: v_test_canonicalize_n90_x86_fp80:
-; SSE: # %bb.0:
-; SSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; SSE-NEXT: fstpt (%rdi)
-; SSE-NEXT: retq
-;
-; SSE2-LABEL: v_test_canonicalize_n90_x86_fp80:
-; SSE2: # %bb.0:
-; SSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; SSE2-NEXT: fstpt (%rdi)
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: v_test_canonicalize_n90_x86_fp80:
-; AVX: # %bb.0:
-; AVX-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; AVX-NEXT: fstpt (%rdi)
-; AVX-NEXT: retq
-;
-; AVX2-LABEL: v_test_canonicalize_n90_x86_fp80:
-; AVX2: # %bb.0:
-; AVX2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; AVX2-NEXT: fstpt (%rdi)
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: v_test_canonicalize_n90_x86_fp80:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; AVX512F-NEXT: fstpt (%rdi)
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: v_test_canonicalize_n90_x86_fp80:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; AVX512BW-NEXT: fstpt (%rdi)
-; AVX512BW-NEXT: retq
- %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xKC005B400000000000000)
- store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out
- ret void
-}
-
-define void @v_test_canonicalize_n3__half(half addrspace(1)* %out) {
-; SSE-LABEL: v_test_canonicalize_n3__half:
-; SSE: # %bb.0: # %entry
-; SSE-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-NEXT: pextrw $0, %xmm0, %eax
-; SSE-NEXT: movw %ax, (%rdi)
-; SSE-NEXT: retq
-;
-; SSE2-LABEL: v_test_canonicalize_n3__half:
-; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: pextrw $0, %xmm0, %eax
-; SSE2-NEXT: movw %ax, (%rdi)
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: v_test_canonicalize_n3__half:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
-; AVX-NEXT: retq
-;
-; AVX2-LABEL: v_test_canonicalize_n3__half:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: v_test_canonicalize_n3__half:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: v_test_canonicalize_n3__half:
-; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
-; AVX512BW-NEXT: retq
-entry:
- %canonicalized = call half @llvm.canonicalize.f16(half 0xHC200)
- store half %canonicalized, half addrspace(1)* %out
- ret void
-}
diff --git a/llvm/test/CodeGen/X86/canonicalize-subnormals.ll b/llvm/test/CodeGen/X86/canonicalize-subnormals.ll
deleted file mode 100644
index 034da96271eb85..00000000000000
--- a/llvm/test/CodeGen/X86/canonicalize-subnormals.ll
+++ /dev/null
@@ -1,1888 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5
-; RUN: llc -mattr=sse2 -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=PRE-SIGN-SSE2 %s
-; RUN: llc -mattr=sse2 -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL-SSE2 %s
-; RUN: llc -mattr=sse2 -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL-SSE2 %s
-; RUN: llc -mattr=+avx -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=PRE-SIGN-AVX %s
-; RUN: llc -mattr=+avx -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL-AVX %s
-; RUN: llc -mattr=+avx -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL-AVX %s
-; RUN: llc -mattr=+avx2 -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=PRE-SIGN-AVX2 %s
-; RUN: llc -mattr=+avx2 -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL-AVX2 %s
-; RUN: llc -mattr=+avx2 -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL-AVX2 %s
-; RUN: llc -mattr=+avx512f -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=PRE-SIGN-AVX512F %s
-; RUN: llc -mattr=+avx512f -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL-AVX512F %s
-; RUN: llc -mattr=+avx512f -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL-AVX512F %s
-; RUN: llc -mattr=+avx512bw -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=PRE-SIGN-AVX512BW %s
-; RUN: llc -mattr=+avx512bw -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL-AVX512BW %s
-; RUN: llc -mattr=+avx512bw -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL-AVX512BW %s
-
-define double @test_bad_subnormal() {
-; PRE-SIGN-SSE2-LABEL: test_bad_subnormal:
-; PRE-SIGN-SSE2: # %bb.0:
-; PRE-SIGN-SSE2-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
-; PRE-SIGN-SSE2-NEXT: retq
-;
-; IEEE-DENORMAL-SSE2-LABEL: test_bad_subnormal:
-; IEEE-DENORMAL-SSE2: # %bb.0:
-; IEEE-DENORMAL-SSE2-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
-; IEEE-DENORMAL-SSE2-NEXT: retq
-;
-; DYN-DENORMAL-SSE2-LABEL: test_bad_subnormal:
-; DYN-DENORMAL-SSE2: # %bb.0:
-; DYN-DENORMAL-SSE2-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
-; DYN-DENORMAL-SSE2-NEXT: retq
-;
-; PRE-SIGN-AVX-LABEL: test_bad_subnormal:
-; PRE-SIGN-AVX: # %bb.0:
-; PRE-SIGN-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
-; PRE-SIGN-AVX-NEXT: retq
-;
-; IEEE-DENORMAL-AVX-LABEL: test_bad_subnormal:
-; IEEE-DENORMAL-AVX: # %bb.0:
-; IEEE-DENORMAL-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
-; IEEE-DENORMAL-AVX-NEXT: retq
-;
-; DYN-DENORMAL-AVX-LABEL: test_bad_subnormal:
-; DYN-DENORMAL-AVX: # %bb.0:
-; DYN-DENORMAL-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
-; DYN-DENORMAL-AVX-NEXT: retq
-;
-; PRE-SIGN-AVX2-LABEL: test_bad_subnormal:
-; PRE-SIGN-AVX2: # %bb.0:
-; PRE-SIGN-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
-; PRE-SIGN-AVX2-NEXT: retq
-;
-; IEEE-DENORMAL-AVX2-LABEL: test_bad_subnormal:
-; IEEE-DENORMAL-AVX2: # %bb.0:
-; IEEE-DENORMAL-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
-; IEEE-DENORMAL-AVX2-NEXT: retq
-;
-; DYN-DENORMAL-AVX2-LABEL: test_bad_subnormal:
-; DYN-DENORMAL-AVX2: # %bb.0:
-; DYN-DENORMAL-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
-; DYN-DENORMAL-AVX2-NEXT: retq
-;
-; PRE-SIGN-AVX512F-LABEL: test_bad_subnormal:
-; PRE-SIGN-AVX512F: # %bb.0:
-; PRE-SIGN-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
-; PRE-SIGN-AVX512F-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512F-LABEL: test_bad_subnormal:
-; IEEE-DENORMAL-AVX512F: # %bb.0:
-; IEEE-DENORMAL-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
-; IEEE-DENORMAL-AVX512F-NEXT: retq
-;
-; DYN-DENORMAL-AVX512F-LABEL: test_bad_subnormal:
-; DYN-DENORMAL-AVX512F: # %bb.0:
-; DYN-DENORMAL-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
-; DYN-DENORMAL-AVX512F-NEXT: retq
-;
-; PRE-SIGN-AVX512BW-LABEL: test_bad_subnormal:
-; PRE-SIGN-AVX512BW: # %bb.0:
-; PRE-SIGN-AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
-; PRE-SIGN-AVX512BW-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512BW-LABEL: test_bad_subnormal:
-; IEEE-DENORMAL-AVX512BW: # %bb.0:
-; IEEE-DENORMAL-AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
-; IEEE-DENORMAL-AVX512BW-NEXT: retq
-;
-; DYN-DENORMAL-AVX512BW-LABEL: test_bad_subnormal:
-; DYN-DENORMAL-AVX512BW: # %bb.0:
-; DYN-DENORMAL-AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0]
-; DYN-DENORMAL-AVX512BW-NEXT: retq
- %canon = call double @llvm.canonicalize(double 0x7ff8000000000001) ; Nan
- ret double %canon
-}
-
-define void @canonicalize_denormal1_f32_pre_sign(float addrspace(1)* %out) {
-; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f32_pre_sign:
-; PRE-SIGN-SSE2: # %bb.0:
-; PRE-SIGN-SSE2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
-; PRE-SIGN-SSE2-NEXT: retq
-;
-; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_pre_sign:
-; IEEE-DENORMAL-SSE2: # %bb.0:
-; IEEE-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; IEEE-DENORMAL-SSE2-NEXT: retq
-;
-; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_pre_sign:
-; DYN-DENORMAL-SSE2: # %bb.0:
-; DYN-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; DYN-DENORMAL-SSE2-NEXT: retq
-;
-; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f32_pre_sign:
-; PRE-SIGN-AVX: # %bb.0:
-; PRE-SIGN-AVX-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
-; PRE-SIGN-AVX-NEXT: retq
-;
-; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_pre_sign:
-; IEEE-DENORMAL-AVX: # %bb.0:
-; IEEE-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; IEEE-DENORMAL-AVX-NEXT: retq
-;
-; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_pre_sign:
-; DYN-DENORMAL-AVX: # %bb.0:
-; DYN-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; DYN-DENORMAL-AVX-NEXT: retq
-;
-; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f32_pre_sign:
-; PRE-SIGN-AVX2: # %bb.0:
-; PRE-SIGN-AVX2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
-; PRE-SIGN-AVX2-NEXT: retq
-;
-; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_pre_sign:
-; IEEE-DENORMAL-AVX2: # %bb.0:
-; IEEE-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; IEEE-DENORMAL-AVX2-NEXT: retq
-;
-; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_pre_sign:
-; DYN-DENORMAL-AVX2: # %bb.0:
-; DYN-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; DYN-DENORMAL-AVX2-NEXT: retq
-;
-; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f32_pre_sign:
-; PRE-SIGN-AVX512F: # %bb.0:
-; PRE-SIGN-AVX512F-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
-; PRE-SIGN-AVX512F-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_pre_sign:
-; IEEE-DENORMAL-AVX512F: # %bb.0:
-; IEEE-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; IEEE-DENORMAL-AVX512F-NEXT: retq
-;
-; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_pre_sign:
-; DYN-DENORMAL-AVX512F: # %bb.0:
-; DYN-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; DYN-DENORMAL-AVX512F-NEXT: retq
-;
-; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f32_pre_sign:
-; PRE-SIGN-AVX512BW: # %bb.0:
-; PRE-SIGN-AVX512BW-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
-; PRE-SIGN-AVX512BW-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_pre_sign:
-; IEEE-DENORMAL-AVX512BW: # %bb.0:
-; IEEE-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; IEEE-DENORMAL-AVX512BW-NEXT: retq
-;
-; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_pre_sign:
-; DYN-DENORMAL-AVX512BW: # %bb.0:
-; DYN-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; DYN-DENORMAL-AVX512BW-NEXT: retq
- %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
- store float %canonicalized, float addrspace(1)* %out
- ret void
-}
-
-define void @canonicalize_denormal1_f64_pre_sign(double addrspace(1)* %out) {
-; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f64_pre_sign:
-; PRE-SIGN-SSE2: # %bb.0:
-; PRE-SIGN-SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-SSE2-NEXT: retq
-;
-; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_pre_sign:
-; IEEE-DENORMAL-SSE2: # %bb.0:
-; IEEE-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-SSE2-NEXT: retq
-;
-; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_pre_sign:
-; DYN-DENORMAL-SSE2: # %bb.0:
-; DYN-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-SSE2-NEXT: retq
-;
-; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f64_pre_sign:
-; PRE-SIGN-AVX: # %bb.0:
-; PRE-SIGN-AVX-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX-NEXT: retq
-;
-; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_pre_sign:
-; IEEE-DENORMAL-AVX: # %bb.0:
-; IEEE-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX-NEXT: retq
-;
-; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_pre_sign:
-; DYN-DENORMAL-AVX: # %bb.0:
-; DYN-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX-NEXT: retq
-;
-; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f64_pre_sign:
-; PRE-SIGN-AVX2: # %bb.0:
-; PRE-SIGN-AVX2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX2-NEXT: retq
-;
-; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_pre_sign:
-; IEEE-DENORMAL-AVX2: # %bb.0:
-; IEEE-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX2-NEXT: retq
-;
-; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_pre_sign:
-; DYN-DENORMAL-AVX2: # %bb.0:
-; DYN-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX2-NEXT: retq
-;
-; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f64_pre_sign:
-; PRE-SIGN-AVX512F: # %bb.0:
-; PRE-SIGN-AVX512F-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX512F-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_pre_sign:
-; IEEE-DENORMAL-AVX512F: # %bb.0:
-; IEEE-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX512F-NEXT: retq
-;
-; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_pre_sign:
-; DYN-DENORMAL-AVX512F: # %bb.0:
-; DYN-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX512F-NEXT: retq
-;
-; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f64_pre_sign:
-; PRE-SIGN-AVX512BW: # %bb.0:
-; PRE-SIGN-AVX512BW-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX512BW-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_pre_sign:
-; IEEE-DENORMAL-AVX512BW: # %bb.0:
-; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX512BW-NEXT: retq
-;
-; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_pre_sign:
-; DYN-DENORMAL-AVX512BW: # %bb.0:
-; DYN-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX512BW-NEXT: retq
- %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
- store double %canonicalized, double addrspace(1)* %out
- ret void
-}
-
-
-define void @canonicalize_qnan_f64(double addrspace(1)* %out) {
-; PRE-SIGN-SSE2-LABEL: canonicalize_qnan_f64:
-; PRE-SIGN-SSE2: # %bb.0:
-; PRE-SIGN-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-SSE2-NEXT: retq
-;
-; IEEE-DENORMAL-SSE2-LABEL: canonicalize_qnan_f64:
-; IEEE-DENORMAL-SSE2: # %bb.0:
-; IEEE-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-SSE2-NEXT: retq
-;
-; DYN-DENORMAL-SSE2-LABEL: canonicalize_qnan_f64:
-; DYN-DENORMAL-SSE2: # %bb.0:
-; DYN-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-SSE2-NEXT: retq
-;
-; PRE-SIGN-AVX-LABEL: canonicalize_qnan_f64:
-; PRE-SIGN-AVX: # %bb.0:
-; PRE-SIGN-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX-NEXT: retq
-;
-; IEEE-DENORMAL-AVX-LABEL: canonicalize_qnan_f64:
-; IEEE-DENORMAL-AVX: # %bb.0:
-; IEEE-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX-NEXT: retq
-;
-; DYN-DENORMAL-AVX-LABEL: canonicalize_qnan_f64:
-; DYN-DENORMAL-AVX: # %bb.0:
-; DYN-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX-NEXT: retq
-;
-; PRE-SIGN-AVX2-LABEL: canonicalize_qnan_f64:
-; PRE-SIGN-AVX2: # %bb.0:
-; PRE-SIGN-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX2-NEXT: retq
-;
-; IEEE-DENORMAL-AVX2-LABEL: canonicalize_qnan_f64:
-; IEEE-DENORMAL-AVX2: # %bb.0:
-; IEEE-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX2-NEXT: retq
-;
-; DYN-DENORMAL-AVX2-LABEL: canonicalize_qnan_f64:
-; DYN-DENORMAL-AVX2: # %bb.0:
-; DYN-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX2-NEXT: retq
-;
-; PRE-SIGN-AVX512F-LABEL: canonicalize_qnan_f64:
-; PRE-SIGN-AVX512F: # %bb.0:
-; PRE-SIGN-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX512F-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_qnan_f64:
-; IEEE-DENORMAL-AVX512F: # %bb.0:
-; IEEE-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX512F-NEXT: retq
-;
-; DYN-DENORMAL-AVX512F-LABEL: canonicalize_qnan_f64:
-; DYN-DENORMAL-AVX512F: # %bb.0:
-; DYN-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX512F-NEXT: retq
-;
-; PRE-SIGN-AVX512BW-LABEL: canonicalize_qnan_f64:
-; PRE-SIGN-AVX512BW: # %bb.0:
-; PRE-SIGN-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX512BW-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_f64:
-; IEEE-DENORMAL-AVX512BW: # %bb.0:
-; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX512BW-NEXT: retq
-;
-; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_f64:
-; DYN-DENORMAL-AVX512BW: # %bb.0:
-; DYN-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX512BW-NEXT: retq
- %canonicalized = call double @llvm.canonicalize.f64(double 0x7FF8000000000000)
- store double %canonicalized, double addrspace(1)* %out
- ret void
-}
-
-define void @canonicalize_qnan_value_neg1_f64(double addrspace(1)* %out) {
-; PRE-SIGN-SSE2-LABEL: canonicalize_qnan_value_neg1_f64:
-; PRE-SIGN-SSE2: # %bb.0:
-; PRE-SIGN-SSE2-NEXT: movq $-1, (%rdi)
-; PRE-SIGN-SSE2-NEXT: retq
-;
-; IEEE-DENORMAL-SSE2-LABEL: canonicalize_qnan_value_neg1_f64:
-; IEEE-DENORMAL-SSE2: # %bb.0:
-; IEEE-DENORMAL-SSE2-NEXT: movq $-1, (%rdi)
-; IEEE-DENORMAL-SSE2-NEXT: retq
-;
-; DYN-DENORMAL-SSE2-LABEL: canonicalize_qnan_value_neg1_f64:
-; DYN-DENORMAL-SSE2: # %bb.0:
-; DYN-DENORMAL-SSE2-NEXT: movq $-1, (%rdi)
-; DYN-DENORMAL-SSE2-NEXT: retq
-;
-; PRE-SIGN-AVX-LABEL: canonicalize_qnan_value_neg1_f64:
-; PRE-SIGN-AVX: # %bb.0:
-; PRE-SIGN-AVX-NEXT: movq $-1, (%rdi)
-; PRE-SIGN-AVX-NEXT: retq
-;
-; IEEE-DENORMAL-AVX-LABEL: canonicalize_qnan_value_neg1_f64:
-; IEEE-DENORMAL-AVX: # %bb.0:
-; IEEE-DENORMAL-AVX-NEXT: movq $-1, (%rdi)
-; IEEE-DENORMAL-AVX-NEXT: retq
-;
-; DYN-DENORMAL-AVX-LABEL: canonicalize_qnan_value_neg1_f64:
-; DYN-DENORMAL-AVX: # %bb.0:
-; DYN-DENORMAL-AVX-NEXT: movq $-1, (%rdi)
-; DYN-DENORMAL-AVX-NEXT: retq
-;
-; PRE-SIGN-AVX2-LABEL: canonicalize_qnan_value_neg1_f64:
-; PRE-SIGN-AVX2: # %bb.0:
-; PRE-SIGN-AVX2-NEXT: movq $-1, (%rdi)
-; PRE-SIGN-AVX2-NEXT: retq
-;
-; IEEE-DENORMAL-AVX2-LABEL: canonicalize_qnan_value_neg1_f64:
-; IEEE-DENORMAL-AVX2: # %bb.0:
-; IEEE-DENORMAL-AVX2-NEXT: movq $-1, (%rdi)
-; IEEE-DENORMAL-AVX2-NEXT: retq
-;
-; DYN-DENORMAL-AVX2-LABEL: canonicalize_qnan_value_neg1_f64:
-; DYN-DENORMAL-AVX2: # %bb.0:
-; DYN-DENORMAL-AVX2-NEXT: movq $-1, (%rdi)
-; DYN-DENORMAL-AVX2-NEXT: retq
-;
-; PRE-SIGN-AVX512F-LABEL: canonicalize_qnan_value_neg1_f64:
-; PRE-SIGN-AVX512F: # %bb.0:
-; PRE-SIGN-AVX512F-NEXT: movq $-1, (%rdi)
-; PRE-SIGN-AVX512F-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_qnan_value_neg1_f64:
-; IEEE-DENORMAL-AVX512F: # %bb.0:
-; IEEE-DENORMAL-AVX512F-NEXT: movq $-1, (%rdi)
-; IEEE-DENORMAL-AVX512F-NEXT: retq
-;
-; DYN-DENORMAL-AVX512F-LABEL: canonicalize_qnan_value_neg1_f64:
-; DYN-DENORMAL-AVX512F: # %bb.0:
-; DYN-DENORMAL-AVX512F-NEXT: movq $-1, (%rdi)
-; DYN-DENORMAL-AVX512F-NEXT: retq
-;
-; PRE-SIGN-AVX512BW-LABEL: canonicalize_qnan_value_neg1_f64:
-; PRE-SIGN-AVX512BW: # %bb.0:
-; PRE-SIGN-AVX512BW-NEXT: movq $-1, (%rdi)
-; PRE-SIGN-AVX512BW-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_value_neg1_f64:
-; IEEE-DENORMAL-AVX512BW: # %bb.0:
-; IEEE-DENORMAL-AVX512BW-NEXT: movq $-1, (%rdi)
-; IEEE-DENORMAL-AVX512BW-NEXT: retq
-;
-; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_value_neg1_f64:
-; DYN-DENORMAL-AVX512BW: # %bb.0:
-; DYN-DENORMAL-AVX512BW-NEXT: movq $-1, (%rdi)
-; DYN-DENORMAL-AVX512BW-NEXT: retq
- %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -1 to double))
- store double %canonicalized, double addrspace(1)* %out
- ret void
-}
-
-define void @canonicalize_qnan_value_neg2_f64(double addrspace(1)* %out) {
-; PRE-SIGN-SSE2-LABEL: canonicalize_qnan_value_neg2_f64:
-; PRE-SIGN-SSE2: # %bb.0:
-; PRE-SIGN-SSE2-NEXT: movq $-2, (%rdi)
-; PRE-SIGN-SSE2-NEXT: retq
-;
-; IEEE-DENORMAL-SSE2-LABEL: canonicalize_qnan_value_neg2_f64:
-; IEEE-DENORMAL-SSE2: # %bb.0:
-; IEEE-DENORMAL-SSE2-NEXT: movq $-2, (%rdi)
-; IEEE-DENORMAL-SSE2-NEXT: retq
-;
-; DYN-DENORMAL-SSE2-LABEL: canonicalize_qnan_value_neg2_f64:
-; DYN-DENORMAL-SSE2: # %bb.0:
-; DYN-DENORMAL-SSE2-NEXT: movq $-2, (%rdi)
-; DYN-DENORMAL-SSE2-NEXT: retq
-;
-; PRE-SIGN-AVX-LABEL: canonicalize_qnan_value_neg2_f64:
-; PRE-SIGN-AVX: # %bb.0:
-; PRE-SIGN-AVX-NEXT: movq $-2, (%rdi)
-; PRE-SIGN-AVX-NEXT: retq
-;
-; IEEE-DENORMAL-AVX-LABEL: canonicalize_qnan_value_neg2_f64:
-; IEEE-DENORMAL-AVX: # %bb.0:
-; IEEE-DENORMAL-AVX-NEXT: movq $-2, (%rdi)
-; IEEE-DENORMAL-AVX-NEXT: retq
-;
-; DYN-DENORMAL-AVX-LABEL: canonicalize_qnan_value_neg2_f64:
-; DYN-DENORMAL-AVX: # %bb.0:
-; DYN-DENORMAL-AVX-NEXT: movq $-2, (%rdi)
-; DYN-DENORMAL-AVX-NEXT: retq
-;
-; PRE-SIGN-AVX2-LABEL: canonicalize_qnan_value_neg2_f64:
-; PRE-SIGN-AVX2: # %bb.0:
-; PRE-SIGN-AVX2-NEXT: movq $-2, (%rdi)
-; PRE-SIGN-AVX2-NEXT: retq
-;
-; IEEE-DENORMAL-AVX2-LABEL: canonicalize_qnan_value_neg2_f64:
-; IEEE-DENORMAL-AVX2: # %bb.0:
-; IEEE-DENORMAL-AVX2-NEXT: movq $-2, (%rdi)
-; IEEE-DENORMAL-AVX2-NEXT: retq
-;
-; DYN-DENORMAL-AVX2-LABEL: canonicalize_qnan_value_neg2_f64:
-; DYN-DENORMAL-AVX2: # %bb.0:
-; DYN-DENORMAL-AVX2-NEXT: movq $-2, (%rdi)
-; DYN-DENORMAL-AVX2-NEXT: retq
-;
-; PRE-SIGN-AVX512F-LABEL: canonicalize_qnan_value_neg2_f64:
-; PRE-SIGN-AVX512F: # %bb.0:
-; PRE-SIGN-AVX512F-NEXT: movq $-2, (%rdi)
-; PRE-SIGN-AVX512F-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_qnan_value_neg2_f64:
-; IEEE-DENORMAL-AVX512F: # %bb.0:
-; IEEE-DENORMAL-AVX512F-NEXT: movq $-2, (%rdi)
-; IEEE-DENORMAL-AVX512F-NEXT: retq
-;
-; DYN-DENORMAL-AVX512F-LABEL: canonicalize_qnan_value_neg2_f64:
-; DYN-DENORMAL-AVX512F: # %bb.0:
-; DYN-DENORMAL-AVX512F-NEXT: movq $-2, (%rdi)
-; DYN-DENORMAL-AVX512F-NEXT: retq
-;
-; PRE-SIGN-AVX512BW-LABEL: canonicalize_qnan_value_neg2_f64:
-; PRE-SIGN-AVX512BW: # %bb.0:
-; PRE-SIGN-AVX512BW-NEXT: movq $-2, (%rdi)
-; PRE-SIGN-AVX512BW-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_value_neg2_f64:
-; IEEE-DENORMAL-AVX512BW: # %bb.0:
-; IEEE-DENORMAL-AVX512BW-NEXT: movq $-2, (%rdi)
-; IEEE-DENORMAL-AVX512BW-NEXT: retq
-;
-; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_value_neg2_f64:
-; DYN-DENORMAL-AVX512BW: # %bb.0:
-; DYN-DENORMAL-AVX512BW-NEXT: movq $-2, (%rdi)
-; DYN-DENORMAL-AVX512BW-NEXT: retq
- %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -2 to double))
- store double %canonicalized, double addrspace(1)* %out
- ret void
-}
-
-define void @canonicalize_snan0_value_f64(double addrspace(1)* %out) {
-; PRE-SIGN-SSE2-LABEL: canonicalize_snan0_value_f64:
-; PRE-SIGN-SSE2: # %bb.0:
-; PRE-SIGN-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-SSE2-NEXT: retq
-;
-; IEEE-DENORMAL-SSE2-LABEL: canonicalize_snan0_value_f64:
-; IEEE-DENORMAL-SSE2: # %bb.0:
-; IEEE-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-SSE2-NEXT: retq
-;
-; DYN-DENORMAL-SSE2-LABEL: canonicalize_snan0_value_f64:
-; DYN-DENORMAL-SSE2: # %bb.0:
-; DYN-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-SSE2-NEXT: retq
-;
-; PRE-SIGN-AVX-LABEL: canonicalize_snan0_value_f64:
-; PRE-SIGN-AVX: # %bb.0:
-; PRE-SIGN-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX-NEXT: retq
-;
-; IEEE-DENORMAL-AVX-LABEL: canonicalize_snan0_value_f64:
-; IEEE-DENORMAL-AVX: # %bb.0:
-; IEEE-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX-NEXT: retq
-;
-; DYN-DENORMAL-AVX-LABEL: canonicalize_snan0_value_f64:
-; DYN-DENORMAL-AVX: # %bb.0:
-; DYN-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX-NEXT: retq
-;
-; PRE-SIGN-AVX2-LABEL: canonicalize_snan0_value_f64:
-; PRE-SIGN-AVX2: # %bb.0:
-; PRE-SIGN-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX2-NEXT: retq
-;
-; IEEE-DENORMAL-AVX2-LABEL: canonicalize_snan0_value_f64:
-; IEEE-DENORMAL-AVX2: # %bb.0:
-; IEEE-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX2-NEXT: retq
-;
-; DYN-DENORMAL-AVX2-LABEL: canonicalize_snan0_value_f64:
-; DYN-DENORMAL-AVX2: # %bb.0:
-; DYN-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX2-NEXT: retq
-;
-; PRE-SIGN-AVX512F-LABEL: canonicalize_snan0_value_f64:
-; PRE-SIGN-AVX512F: # %bb.0:
-; PRE-SIGN-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX512F-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_snan0_value_f64:
-; IEEE-DENORMAL-AVX512F: # %bb.0:
-; IEEE-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX512F-NEXT: retq
-;
-; DYN-DENORMAL-AVX512F-LABEL: canonicalize_snan0_value_f64:
-; DYN-DENORMAL-AVX512F: # %bb.0:
-; DYN-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX512F-NEXT: retq
-;
-; PRE-SIGN-AVX512BW-LABEL: canonicalize_snan0_value_f64:
-; PRE-SIGN-AVX512BW: # %bb.0:
-; PRE-SIGN-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX512BW-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_snan0_value_f64:
-; IEEE-DENORMAL-AVX512BW: # %bb.0:
-; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX512BW-NEXT: retq
-;
-; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_snan0_value_f64:
-; DYN-DENORMAL-AVX512BW: # %bb.0:
-; DYN-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX512BW-NEXT: retq
- %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9218868437227405313 to double))
- store double %canonicalized, double addrspace(1)* %out
- ret void
-}
-
-define void @canonicalize_undef(double addrspace(1)* %out) {
-; PRE-SIGN-SSE2-LABEL: canonicalize_undef:
-; PRE-SIGN-SSE2: # %bb.0:
-; PRE-SIGN-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-SSE2-NEXT: retq
-;
-; IEEE-DENORMAL-SSE2-LABEL: canonicalize_undef:
-; IEEE-DENORMAL-SSE2: # %bb.0:
-; IEEE-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-SSE2-NEXT: retq
-;
-; DYN-DENORMAL-SSE2-LABEL: canonicalize_undef:
-; DYN-DENORMAL-SSE2: # %bb.0:
-; DYN-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-SSE2-NEXT: retq
-;
-; PRE-SIGN-AVX-LABEL: canonicalize_undef:
-; PRE-SIGN-AVX: # %bb.0:
-; PRE-SIGN-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX-NEXT: retq
-;
-; IEEE-DENORMAL-AVX-LABEL: canonicalize_undef:
-; IEEE-DENORMAL-AVX: # %bb.0:
-; IEEE-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX-NEXT: retq
-;
-; DYN-DENORMAL-AVX-LABEL: canonicalize_undef:
-; DYN-DENORMAL-AVX: # %bb.0:
-; DYN-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX-NEXT: retq
-;
-; PRE-SIGN-AVX2-LABEL: canonicalize_undef:
-; PRE-SIGN-AVX2: # %bb.0:
-; PRE-SIGN-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX2-NEXT: retq
-;
-; IEEE-DENORMAL-AVX2-LABEL: canonicalize_undef:
-; IEEE-DENORMAL-AVX2: # %bb.0:
-; IEEE-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX2-NEXT: retq
-;
-; DYN-DENORMAL-AVX2-LABEL: canonicalize_undef:
-; DYN-DENORMAL-AVX2: # %bb.0:
-; DYN-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX2-NEXT: retq
-;
-; PRE-SIGN-AVX512F-LABEL: canonicalize_undef:
-; PRE-SIGN-AVX512F: # %bb.0:
-; PRE-SIGN-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX512F-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_undef:
-; IEEE-DENORMAL-AVX512F: # %bb.0:
-; IEEE-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX512F-NEXT: retq
-;
-; DYN-DENORMAL-AVX512F-LABEL: canonicalize_undef:
-; DYN-DENORMAL-AVX512F: # %bb.0:
-; DYN-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX512F-NEXT: retq
-;
-; PRE-SIGN-AVX512BW-LABEL: canonicalize_undef:
-; PRE-SIGN-AVX512BW: # %bb.0:
-; PRE-SIGN-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX512BW-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_undef:
-; IEEE-DENORMAL-AVX512BW: # %bb.0:
-; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX512BW-NEXT: retq
-;
-; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_undef:
-; DYN-DENORMAL-AVX512BW: # %bb.0:
-; DYN-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX512BW-NEXT: retq
- %canonicalized = call double @llvm.canonicalize.f64(double undef)
- store double %canonicalized, double addrspace(1)* %out
- ret void
-}
-
-define void @canonicalize_denormal1_f32_ieee(float addrspace(1)* %out) {
-; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f32_ieee:
-; PRE-SIGN-SSE2: # %bb.0:
-; PRE-SIGN-SSE2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
-; PRE-SIGN-SSE2-NEXT: retq
-;
-; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_ieee:
-; IEEE-DENORMAL-SSE2: # %bb.0:
-; IEEE-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; IEEE-DENORMAL-SSE2-NEXT: retq
-;
-; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_ieee:
-; DYN-DENORMAL-SSE2: # %bb.0:
-; DYN-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; DYN-DENORMAL-SSE2-NEXT: retq
-;
-; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f32_ieee:
-; PRE-SIGN-AVX: # %bb.0:
-; PRE-SIGN-AVX-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
-; PRE-SIGN-AVX-NEXT: retq
-;
-; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_ieee:
-; IEEE-DENORMAL-AVX: # %bb.0:
-; IEEE-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; IEEE-DENORMAL-AVX-NEXT: retq
-;
-; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_ieee:
-; DYN-DENORMAL-AVX: # %bb.0:
-; DYN-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; DYN-DENORMAL-AVX-NEXT: retq
-;
-; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f32_ieee:
-; PRE-SIGN-AVX2: # %bb.0:
-; PRE-SIGN-AVX2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
-; PRE-SIGN-AVX2-NEXT: retq
-;
-; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_ieee:
-; IEEE-DENORMAL-AVX2: # %bb.0:
-; IEEE-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; IEEE-DENORMAL-AVX2-NEXT: retq
-;
-; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_ieee:
-; DYN-DENORMAL-AVX2: # %bb.0:
-; DYN-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; DYN-DENORMAL-AVX2-NEXT: retq
-;
-; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f32_ieee:
-; PRE-SIGN-AVX512F: # %bb.0:
-; PRE-SIGN-AVX512F-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
-; PRE-SIGN-AVX512F-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_ieee:
-; IEEE-DENORMAL-AVX512F: # %bb.0:
-; IEEE-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; IEEE-DENORMAL-AVX512F-NEXT: retq
-;
-; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_ieee:
-; DYN-DENORMAL-AVX512F: # %bb.0:
-; DYN-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; DYN-DENORMAL-AVX512F-NEXT: retq
-;
-; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f32_ieee:
-; PRE-SIGN-AVX512BW: # %bb.0:
-; PRE-SIGN-AVX512BW-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
-; PRE-SIGN-AVX512BW-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_ieee:
-; IEEE-DENORMAL-AVX512BW: # %bb.0:
-; IEEE-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; IEEE-DENORMAL-AVX512BW-NEXT: retq
-;
-; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_ieee:
-; DYN-DENORMAL-AVX512BW: # %bb.0:
-; DYN-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; DYN-DENORMAL-AVX512BW-NEXT: retq
- %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
- store float %canonicalized, float addrspace(1)* %out
- ret void
-}
-
-define void @canonicalize_denormal1_f64_ieee(double addrspace(1)* %out) {
-; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f64_ieee:
-; PRE-SIGN-SSE2: # %bb.0:
-; PRE-SIGN-SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-SSE2-NEXT: retq
-;
-; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_ieee:
-; IEEE-DENORMAL-SSE2: # %bb.0:
-; IEEE-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-SSE2-NEXT: retq
-;
-; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_ieee:
-; DYN-DENORMAL-SSE2: # %bb.0:
-; DYN-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-SSE2-NEXT: retq
-;
-; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f64_ieee:
-; PRE-SIGN-AVX: # %bb.0:
-; PRE-SIGN-AVX-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX-NEXT: retq
-;
-; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_ieee:
-; IEEE-DENORMAL-AVX: # %bb.0:
-; IEEE-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX-NEXT: retq
-;
-; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_ieee:
-; DYN-DENORMAL-AVX: # %bb.0:
-; DYN-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX-NEXT: retq
-;
-; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f64_ieee:
-; PRE-SIGN-AVX2: # %bb.0:
-; PRE-SIGN-AVX2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX2-NEXT: retq
-;
-; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_ieee:
-; IEEE-DENORMAL-AVX2: # %bb.0:
-; IEEE-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX2-NEXT: retq
-;
-; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_ieee:
-; DYN-DENORMAL-AVX2: # %bb.0:
-; DYN-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX2-NEXT: retq
-;
-; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f64_ieee:
-; PRE-SIGN-AVX512F: # %bb.0:
-; PRE-SIGN-AVX512F-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX512F-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_ieee:
-; IEEE-DENORMAL-AVX512F: # %bb.0:
-; IEEE-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX512F-NEXT: retq
-;
-; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_ieee:
-; DYN-DENORMAL-AVX512F: # %bb.0:
-; DYN-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX512F-NEXT: retq
-;
-; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f64_ieee:
-; PRE-SIGN-AVX512BW: # %bb.0:
-; PRE-SIGN-AVX512BW-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX512BW-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_ieee:
-; IEEE-DENORMAL-AVX512BW: # %bb.0:
-; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX512BW-NEXT: retq
-;
-; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_ieee:
-; DYN-DENORMAL-AVX512BW: # %bb.0:
-; DYN-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX512BW-NEXT: retq
- %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
- store double %canonicalized, double addrspace(1)* %out
- ret void
-}
-
-define void @canonicalize_denormal1_f32_dynamic(float addrspace(1)* %out) {
-; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f32_dynamic:
-; PRE-SIGN-SSE2: # %bb.0:
-; PRE-SIGN-SSE2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
-; PRE-SIGN-SSE2-NEXT: retq
-;
-; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_dynamic:
-; IEEE-DENORMAL-SSE2: # %bb.0:
-; IEEE-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; IEEE-DENORMAL-SSE2-NEXT: retq
-;
-; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_dynamic:
-; DYN-DENORMAL-SSE2: # %bb.0:
-; DYN-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; DYN-DENORMAL-SSE2-NEXT: retq
-;
-; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f32_dynamic:
-; PRE-SIGN-AVX: # %bb.0:
-; PRE-SIGN-AVX-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
-; PRE-SIGN-AVX-NEXT: retq
-;
-; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_dynamic:
-; IEEE-DENORMAL-AVX: # %bb.0:
-; IEEE-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; IEEE-DENORMAL-AVX-NEXT: retq
-;
-; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_dynamic:
-; DYN-DENORMAL-AVX: # %bb.0:
-; DYN-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; DYN-DENORMAL-AVX-NEXT: retq
-;
-; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f32_dynamic:
-; PRE-SIGN-AVX2: # %bb.0:
-; PRE-SIGN-AVX2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
-; PRE-SIGN-AVX2-NEXT: retq
-;
-; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_dynamic:
-; IEEE-DENORMAL-AVX2: # %bb.0:
-; IEEE-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; IEEE-DENORMAL-AVX2-NEXT: retq
-;
-; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_dynamic:
-; DYN-DENORMAL-AVX2: # %bb.0:
-; DYN-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; DYN-DENORMAL-AVX2-NEXT: retq
-;
-; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f32_dynamic:
-; PRE-SIGN-AVX512F: # %bb.0:
-; PRE-SIGN-AVX512F-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
-; PRE-SIGN-AVX512F-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_dynamic:
-; IEEE-DENORMAL-AVX512F: # %bb.0:
-; IEEE-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; IEEE-DENORMAL-AVX512F-NEXT: retq
-;
-; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_dynamic:
-; DYN-DENORMAL-AVX512F: # %bb.0:
-; DYN-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; DYN-DENORMAL-AVX512F-NEXT: retq
-;
-; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f32_dynamic:
-; PRE-SIGN-AVX512BW: # %bb.0:
-; PRE-SIGN-AVX512BW-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000
-; PRE-SIGN-AVX512BW-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_dynamic:
-; IEEE-DENORMAL-AVX512BW: # %bb.0:
-; IEEE-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; IEEE-DENORMAL-AVX512BW-NEXT: retq
-;
-; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_dynamic:
-; DYN-DENORMAL-AVX512BW: # %bb.0:
-; DYN-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF
-; DYN-DENORMAL-AVX512BW-NEXT: retq
- %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
- store float %canonicalized, float addrspace(1)* %out
- ret void
-}
-
-define void @canonicalize_denormal1_f64_dynamic(double addrspace(1)* %out) {
-; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f64_dynamic:
-; PRE-SIGN-SSE2: # %bb.0:
-; PRE-SIGN-SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-SSE2-NEXT: retq
-;
-; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_dynamic:
-; IEEE-DENORMAL-SSE2: # %bb.0:
-; IEEE-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-SSE2-NEXT: retq
-;
-; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_dynamic:
-; DYN-DENORMAL-SSE2: # %bb.0:
-; DYN-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-SSE2-NEXT: retq
-;
-; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f64_dynamic:
-; PRE-SIGN-AVX: # %bb.0:
-; PRE-SIGN-AVX-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX-NEXT: retq
-;
-; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_dynamic:
-; IEEE-DENORMAL-AVX: # %bb.0:
-; IEEE-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX-NEXT: retq
-;
-; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_dynamic:
-; DYN-DENORMAL-AVX: # %bb.0:
-; DYN-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX-NEXT: retq
-;
-; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f64_dynamic:
-; PRE-SIGN-AVX2: # %bb.0:
-; PRE-SIGN-AVX2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX2-NEXT: retq
-;
-; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_dynamic:
-; IEEE-DENORMAL-AVX2: # %bb.0:
-; IEEE-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX2-NEXT: retq
-;
-; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_dynamic:
-; DYN-DENORMAL-AVX2: # %bb.0:
-; DYN-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX2-NEXT: retq
-;
-; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f64_dynamic:
-; PRE-SIGN-AVX512F: # %bb.0:
-; PRE-SIGN-AVX512F-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX512F-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_dynamic:
-; IEEE-DENORMAL-AVX512F: # %bb.0:
-; IEEE-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX512F-NEXT: retq
-;
-; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_dynamic:
-; DYN-DENORMAL-AVX512F: # %bb.0:
-; DYN-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX512F-NEXT: retq
-;
-; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f64_dynamic:
-; PRE-SIGN-AVX512BW: # %bb.0:
-; PRE-SIGN-AVX512BW-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi)
-; PRE-SIGN-AVX512BW-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_dynamic:
-; IEEE-DENORMAL-AVX512BW: # %bb.0:
-; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
-; IEEE-DENORMAL-AVX512BW-NEXT: retq
-;
-; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_dynamic:
-; DYN-DENORMAL-AVX512BW: # %bb.0:
-; DYN-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF
-; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi)
-; DYN-DENORMAL-AVX512BW-NEXT: retq
- %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
- store double %canonicalized, double addrspace(1)* %out
- ret void
-}
-
-define void @canonicalize_denormal1_bfloat_pre_sign(bfloat addrspace(1)* %out) {
-; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_bfloat_pre_sign:
-; PRE-SIGN-SSE2: # %bb.0:
-; PRE-SIGN-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; PRE-SIGN-SSE2-NEXT: retq
-;
-; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_pre_sign:
-; IEEE-DENORMAL-SSE2: # %bb.0:
-; IEEE-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; IEEE-DENORMAL-SSE2-NEXT: retq
-;
-; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_pre_sign:
-; DYN-DENORMAL-SSE2: # %bb.0:
-; DYN-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; DYN-DENORMAL-SSE2-NEXT: retq
-;
-; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_bfloat_pre_sign:
-; PRE-SIGN-AVX: # %bb.0:
-; PRE-SIGN-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; PRE-SIGN-AVX-NEXT: retq
-;
-; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_pre_sign:
-; IEEE-DENORMAL-AVX: # %bb.0:
-; IEEE-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; IEEE-DENORMAL-AVX-NEXT: retq
-;
-; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_pre_sign:
-; DYN-DENORMAL-AVX: # %bb.0:
-; DYN-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; DYN-DENORMAL-AVX-NEXT: retq
-;
-; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_bfloat_pre_sign:
-; PRE-SIGN-AVX2: # %bb.0:
-; PRE-SIGN-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; PRE-SIGN-AVX2-NEXT: retq
-;
-; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_pre_sign:
-; IEEE-DENORMAL-AVX2: # %bb.0:
-; IEEE-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; IEEE-DENORMAL-AVX2-NEXT: retq
-;
-; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_pre_sign:
-; DYN-DENORMAL-AVX2: # %bb.0:
-; DYN-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; DYN-DENORMAL-AVX2-NEXT: retq
-;
-; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_bfloat_pre_sign:
-; PRE-SIGN-AVX512F: # %bb.0:
-; PRE-SIGN-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; PRE-SIGN-AVX512F-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_pre_sign:
-; IEEE-DENORMAL-AVX512F: # %bb.0:
-; IEEE-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; IEEE-DENORMAL-AVX512F-NEXT: retq
-;
-; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_pre_sign:
-; DYN-DENORMAL-AVX512F: # %bb.0:
-; DYN-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; DYN-DENORMAL-AVX512F-NEXT: retq
-;
-; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_bfloat_pre_sign:
-; PRE-SIGN-AVX512BW: # %bb.0:
-; PRE-SIGN-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; PRE-SIGN-AVX512BW-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_pre_sign:
-; IEEE-DENORMAL-AVX512BW: # %bb.0:
-; IEEE-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; IEEE-DENORMAL-AVX512BW-NEXT: retq
-;
-; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_pre_sign:
-; DYN-DENORMAL-AVX512BW: # %bb.0:
-; DYN-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; DYN-DENORMAL-AVX512BW-NEXT: retq
- %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat bitcast (i16 32768 to bfloat))
- store bfloat %canonicalized, bfloat addrspace(1)* %out
- ret void
-}
-
-
-define void @canonicalize_denormal1_bfloat_ieee(bfloat addrspace(1)* %out) {
-; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_bfloat_ieee:
-; PRE-SIGN-SSE2: # %bb.0:
-; PRE-SIGN-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; PRE-SIGN-SSE2-NEXT: retq
-;
-; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_ieee:
-; IEEE-DENORMAL-SSE2: # %bb.0:
-; IEEE-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; IEEE-DENORMAL-SSE2-NEXT: retq
-;
-; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_ieee:
-; DYN-DENORMAL-SSE2: # %bb.0:
-; DYN-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; DYN-DENORMAL-SSE2-NEXT: retq
-;
-; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_bfloat_ieee:
-; PRE-SIGN-AVX: # %bb.0:
-; PRE-SIGN-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; PRE-SIGN-AVX-NEXT: retq
-;
-; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_ieee:
-; IEEE-DENORMAL-AVX: # %bb.0:
-; IEEE-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; IEEE-DENORMAL-AVX-NEXT: retq
-;
-; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_ieee:
-; DYN-DENORMAL-AVX: # %bb.0:
-; DYN-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; DYN-DENORMAL-AVX-NEXT: retq
-;
-; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_bfloat_ieee:
-; PRE-SIGN-AVX2: # %bb.0:
-; PRE-SIGN-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; PRE-SIGN-AVX2-NEXT: retq
-;
-; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_ieee:
-; IEEE-DENORMAL-AVX2: # %bb.0:
-; IEEE-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; IEEE-DENORMAL-AVX2-NEXT: retq
-;
-; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_ieee:
-; DYN-DENORMAL-AVX2: # %bb.0:
-; DYN-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; DYN-DENORMAL-AVX2-NEXT: retq
-;
-; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_bfloat_ieee:
-; PRE-SIGN-AVX512F: # %bb.0:
-; PRE-SIGN-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; PRE-SIGN-AVX512F-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_ieee:
-; IEEE-DENORMAL-AVX512F: # %bb.0:
-; IEEE-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; IEEE-DENORMAL-AVX512F-NEXT: retq
-;
-; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_ieee:
-; DYN-DENORMAL-AVX512F: # %bb.0:
-; DYN-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; DYN-DENORMAL-AVX512F-NEXT: retq
-;
-; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_bfloat_ieee:
-; PRE-SIGN-AVX512BW: # %bb.0:
-; PRE-SIGN-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; PRE-SIGN-AVX512BW-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_ieee:
-; IEEE-DENORMAL-AVX512BW: # %bb.0:
-; IEEE-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; IEEE-DENORMAL-AVX512BW-NEXT: retq
-;
-; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_ieee:
-; DYN-DENORMAL-AVX512BW: # %bb.0:
-; DYN-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; DYN-DENORMAL-AVX512BW-NEXT: retq
- %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat bitcast (i16 32768 to bfloat))
- store bfloat %canonicalized, bfloat addrspace(1)* %out
- ret void
-}
-
-
-define void @canonicalize_denormal1_bfloat_dynamic(bfloat addrspace(1)* %out) {
-; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_bfloat_dynamic:
-; PRE-SIGN-SSE2: # %bb.0:
-; PRE-SIGN-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; PRE-SIGN-SSE2-NEXT: retq
-;
-; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_dynamic:
-; IEEE-DENORMAL-SSE2: # %bb.0:
-; IEEE-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; IEEE-DENORMAL-SSE2-NEXT: retq
-;
-; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_dynamic:
-; DYN-DENORMAL-SSE2: # %bb.0:
-; DYN-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; DYN-DENORMAL-SSE2-NEXT: retq
-;
-; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_bfloat_dynamic:
-; PRE-SIGN-AVX: # %bb.0:
-; PRE-SIGN-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; PRE-SIGN-AVX-NEXT: retq
-;
-; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_dynamic:
-; IEEE-DENORMAL-AVX: # %bb.0:
-; IEEE-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; IEEE-DENORMAL-AVX-NEXT: retq
-;
-; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_dynamic:
-; DYN-DENORMAL-AVX: # %bb.0:
-; DYN-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; DYN-DENORMAL-AVX-NEXT: retq
-;
-; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_bfloat_dynamic:
-; PRE-SIGN-AVX2: # %bb.0:
-; PRE-SIGN-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; PRE-SIGN-AVX2-NEXT: retq
-;
-; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_dynamic:
-; IEEE-DENORMAL-AVX2: # %bb.0:
-; IEEE-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; IEEE-DENORMAL-AVX2-NEXT: retq
-;
-; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_dynamic:
-; DYN-DENORMAL-AVX2: # %bb.0:
-; DYN-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; DYN-DENORMAL-AVX2-NEXT: retq
-;
-; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_bfloat_dynamic:
-; PRE-SIGN-AVX512F: # %bb.0:
-; PRE-SIGN-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; PRE-SIGN-AVX512F-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_dynamic:
-; IEEE-DENORMAL-AVX512F: # %bb.0:
-; IEEE-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; IEEE-DENORMAL-AVX512F-NEXT: retq
-;
-; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_dynamic:
-; DYN-DENORMAL-AVX512F: # %bb.0:
-; DYN-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; DYN-DENORMAL-AVX512F-NEXT: retq
-;
-; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_bfloat_dynamic:
-; PRE-SIGN-AVX512BW: # %bb.0:
-; PRE-SIGN-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; PRE-SIGN-AVX512BW-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_dynamic:
-; IEEE-DENORMAL-AVX512BW: # %bb.0:
-; IEEE-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; IEEE-DENORMAL-AVX512BW-NEXT: retq
-;
-; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_dynamic:
-; DYN-DENORMAL-AVX512BW: # %bb.0:
-; DYN-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000
-; DYN-DENORMAL-AVX512BW-NEXT: retq
- %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat bitcast (i16 32768 to bfloat))
- store bfloat %canonicalized, bfloat addrspace(1)* %out
- ret void
-}
-
-define void @canonicalize_denormal1_half_pre_sign(half addrspace(1)* %out) {
-; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_half_pre_sign:
-; PRE-SIGN-SSE2: # %bb.0:
-; PRE-SIGN-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; PRE-SIGN-SSE2-NEXT: pextrw $0, %xmm0, %eax
-; PRE-SIGN-SSE2-NEXT: movw %ax, (%rdi)
-; PRE-SIGN-SSE2-NEXT: retq
-;
-; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_pre_sign:
-; IEEE-DENORMAL-SSE2: # %bb.0:
-; IEEE-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; IEEE-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax
-; IEEE-DENORMAL-SSE2-NEXT: movw %ax, (%rdi)
-; IEEE-DENORMAL-SSE2-NEXT: retq
-;
-; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_pre_sign:
-; DYN-DENORMAL-SSE2: # %bb.0:
-; DYN-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; DYN-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax
-; DYN-DENORMAL-SSE2-NEXT: movw %ax, (%rdi)
-; DYN-DENORMAL-SSE2-NEXT: retq
-;
-; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_half_pre_sign:
-; PRE-SIGN-AVX: # %bb.0:
-; PRE-SIGN-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; PRE-SIGN-AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
-; PRE-SIGN-AVX-NEXT: retq
-;
-; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_pre_sign:
-; IEEE-DENORMAL-AVX: # %bb.0:
-; IEEE-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; IEEE-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
-; IEEE-DENORMAL-AVX-NEXT: retq
-;
-; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_pre_sign:
-; DYN-DENORMAL-AVX: # %bb.0:
-; DYN-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; DYN-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
-; DYN-DENORMAL-AVX-NEXT: retq
-;
-; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_half_pre_sign:
-; PRE-SIGN-AVX2: # %bb.0:
-; PRE-SIGN-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; PRE-SIGN-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
-; PRE-SIGN-AVX2-NEXT: retq
-;
-; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_pre_sign:
-; IEEE-DENORMAL-AVX2: # %bb.0:
-; IEEE-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; IEEE-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
-; IEEE-DENORMAL-AVX2-NEXT: retq
-;
-; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_pre_sign:
-; DYN-DENORMAL-AVX2: # %bb.0:
-; DYN-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; DYN-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
-; DYN-DENORMAL-AVX2-NEXT: retq
-;
-; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_half_pre_sign:
-; PRE-SIGN-AVX512F: # %bb.0:
-; PRE-SIGN-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; PRE-SIGN-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
-; PRE-SIGN-AVX512F-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_pre_sign:
-; IEEE-DENORMAL-AVX512F: # %bb.0:
-; IEEE-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; IEEE-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
-; IEEE-DENORMAL-AVX512F-NEXT: retq
-;
-; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_pre_sign:
-; DYN-DENORMAL-AVX512F: # %bb.0:
-; DYN-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; DYN-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
-; DYN-DENORMAL-AVX512F-NEXT: retq
-;
-; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_half_pre_sign:
-; PRE-SIGN-AVX512BW: # %bb.0:
-; PRE-SIGN-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; PRE-SIGN-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
-; PRE-SIGN-AVX512BW-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_pre_sign:
-; IEEE-DENORMAL-AVX512BW: # %bb.0:
-; IEEE-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; IEEE-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
-; IEEE-DENORMAL-AVX512BW-NEXT: retq
-;
-; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_pre_sign:
-; DYN-DENORMAL-AVX512BW: # %bb.0:
-; DYN-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; DYN-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
-; DYN-DENORMAL-AVX512BW-NEXT: retq
- %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 32768 to half))
- store half %canonicalized, half addrspace(1)* %out
- ret void
-}
-
-
-define void @canonicalize_denormal1_half_ieee(half addrspace(1)* %out) {
-; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_half_ieee:
-; PRE-SIGN-SSE2: # %bb.0:
-; PRE-SIGN-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; PRE-SIGN-SSE2-NEXT: pextrw $0, %xmm0, %eax
-; PRE-SIGN-SSE2-NEXT: movw %ax, (%rdi)
-; PRE-SIGN-SSE2-NEXT: retq
-;
-; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_ieee:
-; IEEE-DENORMAL-SSE2: # %bb.0:
-; IEEE-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; IEEE-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax
-; IEEE-DENORMAL-SSE2-NEXT: movw %ax, (%rdi)
-; IEEE-DENORMAL-SSE2-NEXT: retq
-;
-; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_ieee:
-; DYN-DENORMAL-SSE2: # %bb.0:
-; DYN-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; DYN-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax
-; DYN-DENORMAL-SSE2-NEXT: movw %ax, (%rdi)
-; DYN-DENORMAL-SSE2-NEXT: retq
-;
-; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_half_ieee:
-; PRE-SIGN-AVX: # %bb.0:
-; PRE-SIGN-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; PRE-SIGN-AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
-; PRE-SIGN-AVX-NEXT: retq
-;
-; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_ieee:
-; IEEE-DENORMAL-AVX: # %bb.0:
-; IEEE-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; IEEE-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
-; IEEE-DENORMAL-AVX-NEXT: retq
-;
-; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_ieee:
-; DYN-DENORMAL-AVX: # %bb.0:
-; DYN-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; DYN-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
-; DYN-DENORMAL-AVX-NEXT: retq
-;
-; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_half_ieee:
-; PRE-SIGN-AVX2: # %bb.0:
-; PRE-SIGN-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; PRE-SIGN-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
-; PRE-SIGN-AVX2-NEXT: retq
-;
-; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_ieee:
-; IEEE-DENORMAL-AVX2: # %bb.0:
-; IEEE-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; IEEE-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
-; IEEE-DENORMAL-AVX2-NEXT: retq
-;
-; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_ieee:
-; DYN-DENORMAL-AVX2: # %bb.0:
-; DYN-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; DYN-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
-; DYN-DENORMAL-AVX2-NEXT: retq
-;
-; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_half_ieee:
-; PRE-SIGN-AVX512F: # %bb.0:
-; PRE-SIGN-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; PRE-SIGN-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
-; PRE-SIGN-AVX512F-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_ieee:
-; IEEE-DENORMAL-AVX512F: # %bb.0:
-; IEEE-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; IEEE-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
-; IEEE-DENORMAL-AVX512F-NEXT: retq
-;
-; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_ieee:
-; DYN-DENORMAL-AVX512F: # %bb.0:
-; DYN-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; DYN-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
-; DYN-DENORMAL-AVX512F-NEXT: retq
-;
-; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_half_ieee:
-; PRE-SIGN-AVX512BW: # %bb.0:
-; PRE-SIGN-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; PRE-SIGN-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
-; PRE-SIGN-AVX512BW-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_ieee:
-; IEEE-DENORMAL-AVX512BW: # %bb.0:
-; IEEE-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; IEEE-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
-; IEEE-DENORMAL-AVX512BW-NEXT: retq
-;
-; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_ieee:
-; DYN-DENORMAL-AVX512BW: # %bb.0:
-; DYN-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; DYN-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
-; DYN-DENORMAL-AVX512BW-NEXT: retq
- %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 32768 to half))
- store half %canonicalized, half addrspace(1)* %out
- ret void
-}
-
-define void @canonicalize_denormal1_half_dynamic(half addrspace(1)* %out) {
-; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_half_dynamic:
-; PRE-SIGN-SSE2: # %bb.0:
-; PRE-SIGN-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; PRE-SIGN-SSE2-NEXT: pextrw $0, %xmm0, %eax
-; PRE-SIGN-SSE2-NEXT: movw %ax, (%rdi)
-; PRE-SIGN-SSE2-NEXT: retq
-;
-; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_dynamic:
-; IEEE-DENORMAL-SSE2: # %bb.0:
-; IEEE-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; IEEE-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax
-; IEEE-DENORMAL-SSE2-NEXT: movw %ax, (%rdi)
-; IEEE-DENORMAL-SSE2-NEXT: retq
-;
-; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_dynamic:
-; DYN-DENORMAL-SSE2: # %bb.0:
-; DYN-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; DYN-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax
-; DYN-DENORMAL-SSE2-NEXT: movw %ax, (%rdi)
-; DYN-DENORMAL-SSE2-NEXT: retq
-;
-; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_half_dynamic:
-; PRE-SIGN-AVX: # %bb.0:
-; PRE-SIGN-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; PRE-SIGN-AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
-; PRE-SIGN-AVX-NEXT: retq
-;
-; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_dynamic:
-; IEEE-DENORMAL-AVX: # %bb.0:
-; IEEE-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; IEEE-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
-; IEEE-DENORMAL-AVX-NEXT: retq
-;
-; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_dynamic:
-; DYN-DENORMAL-AVX: # %bb.0:
-; DYN-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; DYN-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
-; DYN-DENORMAL-AVX-NEXT: retq
-;
-; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_half_dynamic:
-; PRE-SIGN-AVX2: # %bb.0:
-; PRE-SIGN-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; PRE-SIGN-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
-; PRE-SIGN-AVX2-NEXT: retq
-;
-; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_dynamic:
-; IEEE-DENORMAL-AVX2: # %bb.0:
-; IEEE-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; IEEE-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
-; IEEE-DENORMAL-AVX2-NEXT: retq
-;
-; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_dynamic:
-; DYN-DENORMAL-AVX2: # %bb.0:
-; DYN-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; DYN-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi)
-; DYN-DENORMAL-AVX2-NEXT: retq
-;
-; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_half_dynamic:
-; PRE-SIGN-AVX512F: # %bb.0:
-; PRE-SIGN-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; PRE-SIGN-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
-; PRE-SIGN-AVX512F-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_dynamic:
-; IEEE-DENORMAL-AVX512F: # %bb.0:
-; IEEE-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; IEEE-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
-; IEEE-DENORMAL-AVX512F-NEXT: retq
-;
-; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_dynamic:
-; DYN-DENORMAL-AVX512F: # %bb.0:
-; DYN-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; DYN-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
-; DYN-DENORMAL-AVX512F-NEXT: retq
-;
-; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_half_dynamic:
-; PRE-SIGN-AVX512BW: # %bb.0:
-; PRE-SIGN-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; PRE-SIGN-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
-; PRE-SIGN-AVX512BW-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_dynamic:
-; IEEE-DENORMAL-AVX512BW: # %bb.0:
-; IEEE-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; IEEE-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
-; IEEE-DENORMAL-AVX512BW-NEXT: retq
-;
-; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_dynamic:
-; DYN-DENORMAL-AVX512BW: # %bb.0:
-; DYN-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; DYN-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
-; DYN-DENORMAL-AVX512BW-NEXT: retq
- %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 32768 to half))
- store half %canonicalized, half addrspace(1)* %out
- ret void
-}
-
-define void @canonicalize_denormal1_x86_fp80_pre_sign(x86_fp80 addrspace(1)* %out) {
-; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
-; PRE-SIGN-SSE2: # %bb.0:
-; PRE-SIGN-SSE2-NEXT: fldz
-; PRE-SIGN-SSE2-NEXT: fstpt (%rdi)
-; PRE-SIGN-SSE2-NEXT: retq
-;
-; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
-; IEEE-DENORMAL-SSE2: # %bb.0:
-; IEEE-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; IEEE-DENORMAL-SSE2-NEXT: fstpt (%rdi)
-; IEEE-DENORMAL-SSE2-NEXT: retq
-;
-; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
-; DYN-DENORMAL-SSE2: # %bb.0:
-; DYN-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; DYN-DENORMAL-SSE2-NEXT: fstpt (%rdi)
-; DYN-DENORMAL-SSE2-NEXT: retq
-;
-; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
-; PRE-SIGN-AVX: # %bb.0:
-; PRE-SIGN-AVX-NEXT: fldz
-; PRE-SIGN-AVX-NEXT: fstpt (%rdi)
-; PRE-SIGN-AVX-NEXT: retq
-;
-; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
-; IEEE-DENORMAL-AVX: # %bb.0:
-; IEEE-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; IEEE-DENORMAL-AVX-NEXT: fstpt (%rdi)
-; IEEE-DENORMAL-AVX-NEXT: retq
-;
-; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
-; DYN-DENORMAL-AVX: # %bb.0:
-; DYN-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; DYN-DENORMAL-AVX-NEXT: fstpt (%rdi)
-; DYN-DENORMAL-AVX-NEXT: retq
-;
-; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
-; PRE-SIGN-AVX2: # %bb.0:
-; PRE-SIGN-AVX2-NEXT: fldz
-; PRE-SIGN-AVX2-NEXT: fstpt (%rdi)
-; PRE-SIGN-AVX2-NEXT: retq
-;
-; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
-; IEEE-DENORMAL-AVX2: # %bb.0:
-; IEEE-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; IEEE-DENORMAL-AVX2-NEXT: fstpt (%rdi)
-; IEEE-DENORMAL-AVX2-NEXT: retq
-;
-; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
-; DYN-DENORMAL-AVX2: # %bb.0:
-; DYN-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; DYN-DENORMAL-AVX2-NEXT: fstpt (%rdi)
-; DYN-DENORMAL-AVX2-NEXT: retq
-;
-; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
-; PRE-SIGN-AVX512F: # %bb.0:
-; PRE-SIGN-AVX512F-NEXT: fldz
-; PRE-SIGN-AVX512F-NEXT: fstpt (%rdi)
-; PRE-SIGN-AVX512F-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
-; IEEE-DENORMAL-AVX512F: # %bb.0:
-; IEEE-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; IEEE-DENORMAL-AVX512F-NEXT: fstpt (%rdi)
-; IEEE-DENORMAL-AVX512F-NEXT: retq
-;
-; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
-; DYN-DENORMAL-AVX512F: # %bb.0:
-; DYN-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; DYN-DENORMAL-AVX512F-NEXT: fstpt (%rdi)
-; DYN-DENORMAL-AVX512F-NEXT: retq
-;
-; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
-; PRE-SIGN-AVX512BW: # %bb.0:
-; PRE-SIGN-AVX512BW-NEXT: fldz
-; PRE-SIGN-AVX512BW-NEXT: fstpt (%rdi)
-; PRE-SIGN-AVX512BW-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
-; IEEE-DENORMAL-AVX512BW: # %bb.0:
-; IEEE-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; IEEE-DENORMAL-AVX512BW-NEXT: fstpt (%rdi)
-; IEEE-DENORMAL-AVX512BW-NEXT: retq
-;
-; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_pre_sign:
-; DYN-DENORMAL-AVX512BW: # %bb.0:
-; DYN-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; DYN-DENORMAL-AVX512BW-NEXT: fstpt (%rdi)
-; DYN-DENORMAL-AVX512BW-NEXT: retq
- %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK00000000000000000001)
- store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out
- ret void
-}
-
-define void @canonicalize_denormal1_x86_fp80_dynamic(x86_fp80 addrspace(1)* %out) {
-; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
-; PRE-SIGN-SSE2: # %bb.0:
-; PRE-SIGN-SSE2-NEXT: fldz
-; PRE-SIGN-SSE2-NEXT: fstpt (%rdi)
-; PRE-SIGN-SSE2-NEXT: retq
-;
-; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
-; IEEE-DENORMAL-SSE2: # %bb.0:
-; IEEE-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; IEEE-DENORMAL-SSE2-NEXT: fstpt (%rdi)
-; IEEE-DENORMAL-SSE2-NEXT: retq
-;
-; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
-; DYN-DENORMAL-SSE2: # %bb.0:
-; DYN-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; DYN-DENORMAL-SSE2-NEXT: fstpt (%rdi)
-; DYN-DENORMAL-SSE2-NEXT: retq
-;
-; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
-; PRE-SIGN-AVX: # %bb.0:
-; PRE-SIGN-AVX-NEXT: fldz
-; PRE-SIGN-AVX-NEXT: fstpt (%rdi)
-; PRE-SIGN-AVX-NEXT: retq
-;
-; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
-; IEEE-DENORMAL-AVX: # %bb.0:
-; IEEE-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; IEEE-DENORMAL-AVX-NEXT: fstpt (%rdi)
-; IEEE-DENORMAL-AVX-NEXT: retq
-;
-; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
-; DYN-DENORMAL-AVX: # %bb.0:
-; DYN-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; DYN-DENORMAL-AVX-NEXT: fstpt (%rdi)
-; DYN-DENORMAL-AVX-NEXT: retq
-;
-; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
-; PRE-SIGN-AVX2: # %bb.0:
-; PRE-SIGN-AVX2-NEXT: fldz
-; PRE-SIGN-AVX2-NEXT: fstpt (%rdi)
-; PRE-SIGN-AVX2-NEXT: retq
-;
-; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
-; IEEE-DENORMAL-AVX2: # %bb.0:
-; IEEE-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; IEEE-DENORMAL-AVX2-NEXT: fstpt (%rdi)
-; IEEE-DENORMAL-AVX2-NEXT: retq
-;
-; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
-; DYN-DENORMAL-AVX2: # %bb.0:
-; DYN-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; DYN-DENORMAL-AVX2-NEXT: fstpt (%rdi)
-; DYN-DENORMAL-AVX2-NEXT: retq
-;
-; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
-; PRE-SIGN-AVX512F: # %bb.0:
-; PRE-SIGN-AVX512F-NEXT: fldz
-; PRE-SIGN-AVX512F-NEXT: fstpt (%rdi)
-; PRE-SIGN-AVX512F-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
-; IEEE-DENORMAL-AVX512F: # %bb.0:
-; IEEE-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; IEEE-DENORMAL-AVX512F-NEXT: fstpt (%rdi)
-; IEEE-DENORMAL-AVX512F-NEXT: retq
-;
-; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
-; DYN-DENORMAL-AVX512F: # %bb.0:
-; DYN-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; DYN-DENORMAL-AVX512F-NEXT: fstpt (%rdi)
-; DYN-DENORMAL-AVX512F-NEXT: retq
-;
-; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
-; PRE-SIGN-AVX512BW: # %bb.0:
-; PRE-SIGN-AVX512BW-NEXT: fldz
-; PRE-SIGN-AVX512BW-NEXT: fstpt (%rdi)
-; PRE-SIGN-AVX512BW-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
-; IEEE-DENORMAL-AVX512BW: # %bb.0:
-; IEEE-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; IEEE-DENORMAL-AVX512BW-NEXT: fstpt (%rdi)
-; IEEE-DENORMAL-AVX512BW-NEXT: retq
-;
-; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_dynamic:
-; DYN-DENORMAL-AVX512BW: # %bb.0:
-; DYN-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; DYN-DENORMAL-AVX512BW-NEXT: fstpt (%rdi)
-; DYN-DENORMAL-AVX512BW-NEXT: retq
- %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK00000000000000000001)
- store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out
- ret void
-}
-
-define void @canonicalize_denormal1_x86_fp80_ieee(x86_fp80 addrspace(1)* %out) {
-; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_x86_fp80_ieee:
-; PRE-SIGN-SSE2: # %bb.0:
-; PRE-SIGN-SSE2-NEXT: fldz
-; PRE-SIGN-SSE2-NEXT: fstpt (%rdi)
-; PRE-SIGN-SSE2-NEXT: retq
-;
-; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_ieee:
-; IEEE-DENORMAL-SSE2: # %bb.0:
-; IEEE-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; IEEE-DENORMAL-SSE2-NEXT: fstpt (%rdi)
-; IEEE-DENORMAL-SSE2-NEXT: retq
-;
-; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_ieee:
-; DYN-DENORMAL-SSE2: # %bb.0:
-; DYN-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; DYN-DENORMAL-SSE2-NEXT: fstpt (%rdi)
-; DYN-DENORMAL-SSE2-NEXT: retq
-;
-; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_x86_fp80_ieee:
-; PRE-SIGN-AVX: # %bb.0:
-; PRE-SIGN-AVX-NEXT: fldz
-; PRE-SIGN-AVX-NEXT: fstpt (%rdi)
-; PRE-SIGN-AVX-NEXT: retq
-;
-; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_ieee:
-; IEEE-DENORMAL-AVX: # %bb.0:
-; IEEE-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; IEEE-DENORMAL-AVX-NEXT: fstpt (%rdi)
-; IEEE-DENORMAL-AVX-NEXT: retq
-;
-; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_ieee:
-; DYN-DENORMAL-AVX: # %bb.0:
-; DYN-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; DYN-DENORMAL-AVX-NEXT: fstpt (%rdi)
-; DYN-DENORMAL-AVX-NEXT: retq
-;
-; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_x86_fp80_ieee:
-; PRE-SIGN-AVX2: # %bb.0:
-; PRE-SIGN-AVX2-NEXT: fldz
-; PRE-SIGN-AVX2-NEXT: fstpt (%rdi)
-; PRE-SIGN-AVX2-NEXT: retq
-;
-; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_ieee:
-; IEEE-DENORMAL-AVX2: # %bb.0:
-; IEEE-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; IEEE-DENORMAL-AVX2-NEXT: fstpt (%rdi)
-; IEEE-DENORMAL-AVX2-NEXT: retq
-;
-; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_ieee:
-; DYN-DENORMAL-AVX2: # %bb.0:
-; DYN-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; DYN-DENORMAL-AVX2-NEXT: fstpt (%rdi)
-; DYN-DENORMAL-AVX2-NEXT: retq
-;
-; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_ieee:
-; PRE-SIGN-AVX512F: # %bb.0:
-; PRE-SIGN-AVX512F-NEXT: fldz
-; PRE-SIGN-AVX512F-NEXT: fstpt (%rdi)
-; PRE-SIGN-AVX512F-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_ieee:
-; IEEE-DENORMAL-AVX512F: # %bb.0:
-; IEEE-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; IEEE-DENORMAL-AVX512F-NEXT: fstpt (%rdi)
-; IEEE-DENORMAL-AVX512F-NEXT: retq
-;
-; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_ieee:
-; DYN-DENORMAL-AVX512F: # %bb.0:
-; DYN-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; DYN-DENORMAL-AVX512F-NEXT: fstpt (%rdi)
-; DYN-DENORMAL-AVX512F-NEXT: retq
-;
-; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_ieee:
-; PRE-SIGN-AVX512BW: # %bb.0:
-; PRE-SIGN-AVX512BW-NEXT: fldz
-; PRE-SIGN-AVX512BW-NEXT: fstpt (%rdi)
-; PRE-SIGN-AVX512BW-NEXT: retq
-;
-; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_ieee:
-; IEEE-DENORMAL-AVX512BW: # %bb.0:
-; IEEE-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; IEEE-DENORMAL-AVX512BW-NEXT: fstpt (%rdi)
-; IEEE-DENORMAL-AVX512BW-NEXT: retq
-;
-; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_ieee:
-; DYN-DENORMAL-AVX512BW: # %bb.0:
-; DYN-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; DYN-DENORMAL-AVX512BW-NEXT: fstpt (%rdi)
-; DYN-DENORMAL-AVX512BW-NEXT: retq
- %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK00000000000000000001)
- store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out
- ret void
-}
diff --git a/llvm/test/CodeGen/X86/canonicalize-vars.ll b/llvm/test/CodeGen/X86/canonicalize-vars.ll
index 0075386c023618..a9564496324703 100644
--- a/llvm/test/CodeGen/X86/canonicalize-vars.ll
+++ b/llvm/test/CodeGen/X86/canonicalize-vars.ll
@@ -10,26 +10,32 @@
define float @canon_fp32_varargsf32(float %a) {
; SSE-LABEL: canon_fp32_varargsf32:
; SSE: # %bb.0:
+; SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: retq
;
; SSE2-LABEL: canon_fp32_varargsf32:
; SSE2: # %bb.0:
+; SSE2-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: canon_fp32_varargsf32:
; AVX: # %bb.0:
+; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX2-LABEL: canon_fp32_varargsf32:
; AVX2: # %bb.0:
+; AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: canon_fp32_varargsf32:
; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: canon_fp32_varargsf32:
; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512BW-NEXT: retq
%canonicalized = call float @llvm.canonicalize.f32(float %a)
ret float %canonicalized
@@ -39,90 +45,80 @@ define x86_fp80 @canon_fp32_varargsf80(x86_fp80 %a) {
; SSE-LABEL: canon_fp32_varargsf80:
; SSE: # %bb.0:
; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
+; SSE-NEXT: fld1
+; SSE-NEXT: fmulp %st, %st(1)
; SSE-NEXT: retq
;
; SSE2-LABEL: canon_fp32_varargsf80:
; SSE2: # %bb.0:
; SSE2-NEXT: fldt {{[0-9]+}}(%rsp)
+; SSE2-NEXT: fld1
+; SSE2-NEXT: fmulp %st, %st(1)
; SSE2-NEXT: retq
;
; AVX-LABEL: canon_fp32_varargsf80:
; AVX: # %bb.0:
; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX-NEXT: fld1
+; AVX-NEXT: fmulp %st, %st(1)
; AVX-NEXT: retq
;
; AVX2-LABEL: canon_fp32_varargsf80:
; AVX2: # %bb.0:
; AVX2-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX2-NEXT: fld1
+; AVX2-NEXT: fmulp %st, %st(1)
; AVX2-NEXT: retq
;
; AVX512F-LABEL: canon_fp32_varargsf80:
; AVX512F: # %bb.0:
; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: fld1
+; AVX512F-NEXT: fmulp %st, %st(1)
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: canon_fp32_varargsf80:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512BW-NEXT: fld1
+; AVX512BW-NEXT: fmulp %st, %st(1)
; AVX512BW-NEXT: retq
%canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %a)
ret x86_fp80 %canonicalized
}
-define bfloat @canon_fp32_varargsbf16(bfloat %a) {
-; SSE-LABEL: canon_fp32_varargsbf16:
-; SSE: # %bb.0:
-; SSE-NEXT: retq
-;
-; SSE2-LABEL: canon_fp32_varargsbf16:
-; SSE2: # %bb.0:
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: canon_fp32_varargsbf16:
-; AVX: # %bb.0:
-; AVX-NEXT: retq
-;
-; AVX2-LABEL: canon_fp32_varargsbf16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: canon_fp32_varargsbf16:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: canon_fp32_varargsbf16:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: retq
- %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat %a)
- ret bfloat %canonicalized
-}
-
define half @complex_canonicalize_fmul_half(half %a, half %b) {
; SSE-LABEL: complex_canonicalize_fmul_half:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
-; SSE-NEXT: movss %xmm1, (%rsp) # 4-byte Spill
+; SSE-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SSE-NEXT: callq __extendhfsf2 at PLT
-; SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; SSE-NEXT: movss (%rsp), %xmm0 # 4-byte Reload
+; SSE-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
+; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: callq __extendhfsf2 at PLT
-; SSE-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
-; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
+; SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SSE-NEXT: movss (%rsp), %xmm1 # 4-byte Reload
; SSE-NEXT: # xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: subss %xmm0, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: callq __truncsfhf2 at PLT
; SSE-NEXT: callq __extendhfsf2 at PLT
-; SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; SSE-NEXT: addss (%rsp), %xmm0 # 4-byte Folded Reload
+; SSE-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
+; SSE-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
; SSE-NEXT: callq __truncsfhf2 at PLT
; SSE-NEXT: callq __extendhfsf2 at PLT
-; SSE-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; SSE-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload
; SSE-NEXT: callq __truncsfhf2 at PLT
; SSE-NEXT: callq __extendhfsf2 at PLT
-; SSE-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload
+; SSE-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
+; SSE-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: callq __extendhfsf2 at PLT
+; SSE-NEXT: mulss (%rsp), %xmm0 # 4-byte Folded Reload
+; SSE-NEXT: callq __truncsfhf2 at PLT
+; SSE-NEXT: callq __extendhfsf2 at PLT
+; SSE-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
; SSE-NEXT: callq __truncsfhf2 at PLT
; SSE-NEXT: popq %rax
; SSE-NEXT: .cfi_def_cfa_offset 8
@@ -132,27 +128,33 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) {
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: pushq %rax
; SSE2-NEXT: .cfi_def_cfa_offset 16
-; SSE2-NEXT: movss %xmm1, (%rsp) # 4-byte Spill
+; SSE2-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; SSE2-NEXT: callq __extendhfsf2 at PLT
-; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; SSE2-NEXT: movss (%rsp), %xmm0 # 4-byte Reload
+; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
+; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: callq __extendhfsf2 at PLT
-; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
-; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
+; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload
; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: subss %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: callq __truncsfhf2 at PLT
; SSE2-NEXT: callq __extendhfsf2 at PLT
-; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; SSE2-NEXT: addss (%rsp), %xmm0 # 4-byte Folded Reload
+; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
+; SSE2-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
; SSE2-NEXT: callq __truncsfhf2 at PLT
; SSE2-NEXT: callq __extendhfsf2 at PLT
-; SSE2-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; SSE2-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload
; SSE2-NEXT: callq __truncsfhf2 at PLT
; SSE2-NEXT: callq __extendhfsf2 at PLT
-; SSE2-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload
+; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
+; SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: callq __extendhfsf2 at PLT
+; SSE2-NEXT: mulss (%rsp), %xmm0 # 4-byte Folded Reload
+; SSE2-NEXT: callq __truncsfhf2 at PLT
+; SSE2-NEXT: callq __extendhfsf2 at PLT
+; SSE2-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
; SSE2-NEXT: callq __truncsfhf2 at PLT
; SSE2-NEXT: popq %rax
; SSE2-NEXT: .cfi_def_cfa_offset 8
@@ -162,26 +164,32 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) {
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: .cfi_def_cfa_offset 16
-; AVX-NEXT: vmovss %xmm1, (%rsp) # 4-byte Spill
+; AVX-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; AVX-NEXT: callq __extendhfsf2 at PLT
-; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; AVX-NEXT: vmovss (%rsp), %xmm0 # 4-byte Reload
+; AVX-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill
+; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: callq __extendhfsf2 at PLT
-; AVX-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill
-; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
+; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; AVX-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload
; AVX-NEXT: # xmm1 = mem[0],zero,zero,zero
; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
; AVX-NEXT: callq __truncsfhf2 at PLT
; AVX-NEXT: callq __extendhfsf2 at PLT
-; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; AVX-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill
+; AVX-NEXT: vaddss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
; AVX-NEXT: callq __truncsfhf2 at PLT
; AVX-NEXT: callq __extendhfsf2 at PLT
-; AVX-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
; AVX-NEXT: callq __truncsfhf2 at PLT
; AVX-NEXT: callq __extendhfsf2 at PLT
-; AVX-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill
+; AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: callq __extendhfsf2 at PLT
+; AVX-NEXT: vmulss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX-NEXT: callq __truncsfhf2 at PLT
+; AVX-NEXT: callq __extendhfsf2 at PLT
+; AVX-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
; AVX-NEXT: callq __truncsfhf2 at PLT
; AVX-NEXT: popq %rax
; AVX-NEXT: .cfi_def_cfa_offset 8
@@ -191,26 +199,32 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) {
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: pushq %rax
; AVX2-NEXT: .cfi_def_cfa_offset 16
-; AVX2-NEXT: vmovss %xmm1, (%rsp) # 4-byte Spill
+; AVX2-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; AVX2-NEXT: callq __extendhfsf2 at PLT
-; AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; AVX2-NEXT: vmovss (%rsp), %xmm0 # 4-byte Reload
+; AVX2-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill
+; AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero
; AVX2-NEXT: callq __extendhfsf2 at PLT
-; AVX2-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill
-; AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
+; AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; AVX2-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload
; AVX2-NEXT: # xmm1 = mem[0],zero,zero,zero
; AVX2-NEXT: vsubss %xmm0, %xmm1, %xmm0
; AVX2-NEXT: callq __truncsfhf2 at PLT
; AVX2-NEXT: callq __extendhfsf2 at PLT
-; AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; AVX2-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX2-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill
+; AVX2-NEXT: vaddss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
; AVX2-NEXT: callq __truncsfhf2 at PLT
; AVX2-NEXT: callq __extendhfsf2 at PLT
-; AVX2-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX2-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
; AVX2-NEXT: callq __truncsfhf2 at PLT
; AVX2-NEXT: callq __extendhfsf2 at PLT
-; AVX2-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX2-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill
+; AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: callq __extendhfsf2 at PLT
+; AVX2-NEXT: vmulss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX2-NEXT: callq __truncsfhf2 at PLT
+; AVX2-NEXT: callq __extendhfsf2 at PLT
+; AVX2-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
; AVX2-NEXT: callq __truncsfhf2 at PLT
; AVX2-NEXT: popq %rax
; AVX2-NEXT: .cfi_def_cfa_offset 8
@@ -232,6 +246,15 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) {
; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX512F-NEXT: vsubss %xmm0, %xmm2, %xmm0
; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512F-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
+; AVX512F-NEXT: vmovd %eax, %xmm2
+; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2
+; AVX512F-NEXT: vmulss %xmm0, %xmm2, %xmm0
+; AVX512F-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX512F-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
+; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512F-NEXT: vsubss %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -255,6 +278,15 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) {
; AVX512BW-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX512BW-NEXT: vsubss %xmm0, %xmm2, %xmm0
; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512BW-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
+; AVX512BW-NEXT: vmovd %eax, %xmm2
+; AVX512BW-NEXT: vcvtph2ps %xmm2, %xmm2
+; AVX512BW-NEXT: vmulss %xmm0, %xmm2, %xmm0
+; AVX512BW-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX512BW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
+; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512BW-NEXT: vsubss %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -280,6 +312,8 @@ define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) {
; SSE-NEXT: fld %st(0)
; SSE-NEXT: fadd %st(2), %st
; SSE-NEXT: fsubp %st, %st(1)
+; SSE-NEXT: fld1
+; SSE-NEXT: fmulp %st, %st(1)
; SSE-NEXT: fsubp %st, %st(1)
; SSE-NEXT: retq
;
@@ -291,6 +325,8 @@ define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) {
; SSE2-NEXT: fld %st(0)
; SSE2-NEXT: fadd %st(2), %st
; SSE2-NEXT: fsubp %st, %st(1)
+; SSE2-NEXT: fld1
+; SSE2-NEXT: fmulp %st, %st(1)
; SSE2-NEXT: fsubp %st, %st(1)
; SSE2-NEXT: retq
;
@@ -302,6 +338,8 @@ define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) {
; AVX-NEXT: fld %st(0)
; AVX-NEXT: fadd %st(2), %st
; AVX-NEXT: fsubp %st, %st(1)
+; AVX-NEXT: fld1
+; AVX-NEXT: fmulp %st, %st(1)
; AVX-NEXT: fsubp %st, %st(1)
; AVX-NEXT: retq
;
@@ -313,6 +351,8 @@ define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) {
; AVX2-NEXT: fld %st(0)
; AVX2-NEXT: fadd %st(2), %st
; AVX2-NEXT: fsubp %st, %st(1)
+; AVX2-NEXT: fld1
+; AVX2-NEXT: fmulp %st, %st(1)
; AVX2-NEXT: fsubp %st, %st(1)
; AVX2-NEXT: retq
;
@@ -324,6 +364,8 @@ define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) {
; AVX512F-NEXT: fld %st(0)
; AVX512F-NEXT: fadd %st(2), %st
; AVX512F-NEXT: fsubp %st, %st(1)
+; AVX512F-NEXT: fld1
+; AVX512F-NEXT: fmulp %st, %st(1)
; AVX512F-NEXT: fsubp %st, %st(1)
; AVX512F-NEXT: retq
;
@@ -335,6 +377,8 @@ define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) {
; AVX512BW-NEXT: fld %st(0)
; AVX512BW-NEXT: fadd %st(2), %st
; AVX512BW-NEXT: fsubp %st, %st(1)
+; AVX512BW-NEXT: fld1
+; AVX512BW-NEXT: fmulp %st, %st(1)
; AVX512BW-NEXT: fsubp %st, %st(1)
; AVX512BW-NEXT: retq
entry:
@@ -347,214 +391,6 @@ entry:
ret x86_fp80 %result
}
-define bfloat @complex_canonicalize_fmul_bfloat(bfloat %a, bfloat %b) {
-; SSE-LABEL: complex_canonicalize_fmul_bfloat:
-; SSE: # %bb.0: # %entry
-; SSE-NEXT: pushq %rax
-; SSE-NEXT: .cfi_def_cfa_offset 16
-; SSE-NEXT: pextrw $0, %xmm0, %eax
-; SSE-NEXT: pextrw $0, %xmm1, %ecx
-; SSE-NEXT: shll $16, %ecx
-; SSE-NEXT: movd %ecx, %xmm1
-; SSE-NEXT: movd %xmm1, (%rsp) # 4-byte Folded Spill
-; SSE-NEXT: shll $16, %eax
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: subss %xmm1, %xmm0
-; SSE-NEXT: callq __truncsfbf2 at PLT
-; SSE-NEXT: pextrw $0, %xmm0, %eax
-; SSE-NEXT: shll $16, %eax
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
-; SSE-NEXT: addss (%rsp), %xmm0 # 4-byte Folded Reload
-; SSE-NEXT: callq __truncsfbf2 at PLT
-; SSE-NEXT: pextrw $0, %xmm0, %eax
-; SSE-NEXT: shll $16, %eax
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
-; SSE-NEXT: callq __truncsfbf2 at PLT
-; SSE-NEXT: pextrw $0, %xmm0, %eax
-; SSE-NEXT: shll $16, %eax
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload
-; SSE-NEXT: callq __truncsfbf2 at PLT
-; SSE-NEXT: popq %rax
-; SSE-NEXT: .cfi_def_cfa_offset 8
-; SSE-NEXT: retq
-;
-; SSE2-LABEL: complex_canonicalize_fmul_bfloat:
-; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: pushq %rax
-; SSE2-NEXT: .cfi_def_cfa_offset 16
-; SSE2-NEXT: pextrw $0, %xmm0, %eax
-; SSE2-NEXT: pextrw $0, %xmm1, %ecx
-; SSE2-NEXT: shll $16, %ecx
-; SSE2-NEXT: movd %ecx, %xmm1
-; SSE2-NEXT: movd %xmm1, (%rsp) # 4-byte Folded Spill
-; SSE2-NEXT: shll $16, %eax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: subss %xmm1, %xmm0
-; SSE2-NEXT: callq __truncsfbf2 at PLT
-; SSE2-NEXT: pextrw $0, %xmm0, %eax
-; SSE2-NEXT: shll $16, %eax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
-; SSE2-NEXT: addss (%rsp), %xmm0 # 4-byte Folded Reload
-; SSE2-NEXT: callq __truncsfbf2 at PLT
-; SSE2-NEXT: pextrw $0, %xmm0, %eax
-; SSE2-NEXT: shll $16, %eax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
-; SSE2-NEXT: callq __truncsfbf2 at PLT
-; SSE2-NEXT: pextrw $0, %xmm0, %eax
-; SSE2-NEXT: shll $16, %eax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload
-; SSE2-NEXT: callq __truncsfbf2 at PLT
-; SSE2-NEXT: popq %rax
-; SSE2-NEXT: .cfi_def_cfa_offset 8
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: complex_canonicalize_fmul_bfloat:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: pushq %rax
-; AVX-NEXT: .cfi_def_cfa_offset 16
-; AVX-NEXT: vpextrw $0, %xmm0, %eax
-; AVX-NEXT: vpextrw $0, %xmm1, %ecx
-; AVX-NEXT: shll $16, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm1
-; AVX-NEXT: vmovd %xmm1, (%rsp) # 4-byte Folded Spill
-; AVX-NEXT: shll $16, %eax
-; AVX-NEXT: vmovd %eax, %xmm0
-; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: callq __truncsfbf2 at PLT
-; AVX-NEXT: vpextrw $0, %xmm0, %eax
-; AVX-NEXT: shll $16, %eax
-; AVX-NEXT: vmovd %eax, %xmm0
-; AVX-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
-; AVX-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
-; AVX-NEXT: callq __truncsfbf2 at PLT
-; AVX-NEXT: vpextrw $0, %xmm0, %eax
-; AVX-NEXT: shll $16, %eax
-; AVX-NEXT: vmovd %eax, %xmm0
-; AVX-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
-; AVX-NEXT: callq __truncsfbf2 at PLT
-; AVX-NEXT: vpextrw $0, %xmm0, %eax
-; AVX-NEXT: shll $16, %eax
-; AVX-NEXT: vmovd %eax, %xmm0
-; AVX-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
-; AVX-NEXT: callq __truncsfbf2 at PLT
-; AVX-NEXT: popq %rax
-; AVX-NEXT: .cfi_def_cfa_offset 8
-; AVX-NEXT: retq
-;
-; AVX2-LABEL: complex_canonicalize_fmul_bfloat:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: pushq %rax
-; AVX2-NEXT: .cfi_def_cfa_offset 16
-; AVX2-NEXT: vpextrw $0, %xmm0, %eax
-; AVX2-NEXT: vpextrw $0, %xmm1, %ecx
-; AVX2-NEXT: shll $16, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm1
-; AVX2-NEXT: vmovd %xmm1, (%rsp) # 4-byte Folded Spill
-; AVX2-NEXT: shll $16, %eax
-; AVX2-NEXT: vmovd %eax, %xmm0
-; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: callq __truncsfbf2 at PLT
-; AVX2-NEXT: vpextrw $0, %xmm0, %eax
-; AVX2-NEXT: shll $16, %eax
-; AVX2-NEXT: vmovd %eax, %xmm0
-; AVX2-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
-; AVX2-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
-; AVX2-NEXT: callq __truncsfbf2 at PLT
-; AVX2-NEXT: vpextrw $0, %xmm0, %eax
-; AVX2-NEXT: shll $16, %eax
-; AVX2-NEXT: vmovd %eax, %xmm0
-; AVX2-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
-; AVX2-NEXT: callq __truncsfbf2 at PLT
-; AVX2-NEXT: vpextrw $0, %xmm0, %eax
-; AVX2-NEXT: shll $16, %eax
-; AVX2-NEXT: vmovd %eax, %xmm0
-; AVX2-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
-; AVX2-NEXT: callq __truncsfbf2 at PLT
-; AVX2-NEXT: popq %rax
-; AVX2-NEXT: .cfi_def_cfa_offset 8
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: complex_canonicalize_fmul_bfloat:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: pushq %rax
-; AVX512F-NEXT: .cfi_def_cfa_offset 16
-; AVX512F-NEXT: vpextrw $0, %xmm0, %eax
-; AVX512F-NEXT: vpextrw $0, %xmm1, %ecx
-; AVX512F-NEXT: shll $16, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm1
-; AVX512F-NEXT: vmovd %xmm1, (%rsp) # 4-byte Folded Spill
-; AVX512F-NEXT: shll $16, %eax
-; AVX512F-NEXT: vmovd %eax, %xmm0
-; AVX512F-NEXT: vsubss %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: callq __truncsfbf2 at PLT
-; AVX512F-NEXT: vpextrw $0, %xmm0, %eax
-; AVX512F-NEXT: shll $16, %eax
-; AVX512F-NEXT: vmovd %eax, %xmm0
-; AVX512F-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
-; AVX512F-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
-; AVX512F-NEXT: callq __truncsfbf2 at PLT
-; AVX512F-NEXT: vpextrw $0, %xmm0, %eax
-; AVX512F-NEXT: shll $16, %eax
-; AVX512F-NEXT: vmovd %eax, %xmm0
-; AVX512F-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
-; AVX512F-NEXT: callq __truncsfbf2 at PLT
-; AVX512F-NEXT: vpextrw $0, %xmm0, %eax
-; AVX512F-NEXT: shll $16, %eax
-; AVX512F-NEXT: vmovd %eax, %xmm0
-; AVX512F-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
-; AVX512F-NEXT: callq __truncsfbf2 at PLT
-; AVX512F-NEXT: popq %rax
-; AVX512F-NEXT: .cfi_def_cfa_offset 8
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: complex_canonicalize_fmul_bfloat:
-; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: pushq %rax
-; AVX512BW-NEXT: .cfi_def_cfa_offset 16
-; AVX512BW-NEXT: vpextrw $0, %xmm0, %eax
-; AVX512BW-NEXT: vpextrw $0, %xmm1, %ecx
-; AVX512BW-NEXT: shll $16, %ecx
-; AVX512BW-NEXT: vmovd %ecx, %xmm1
-; AVX512BW-NEXT: vmovd %xmm1, (%rsp) # 4-byte Folded Spill
-; AVX512BW-NEXT: shll $16, %eax
-; AVX512BW-NEXT: vmovd %eax, %xmm0
-; AVX512BW-NEXT: vsubss %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: callq __truncsfbf2 at PLT
-; AVX512BW-NEXT: vpextrw $0, %xmm0, %eax
-; AVX512BW-NEXT: shll $16, %eax
-; AVX512BW-NEXT: vmovd %eax, %xmm0
-; AVX512BW-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
-; AVX512BW-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
-; AVX512BW-NEXT: callq __truncsfbf2 at PLT
-; AVX512BW-NEXT: vpextrw $0, %xmm0, %eax
-; AVX512BW-NEXT: shll $16, %eax
-; AVX512BW-NEXT: vmovd %eax, %xmm0
-; AVX512BW-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
-; AVX512BW-NEXT: callq __truncsfbf2 at PLT
-; AVX512BW-NEXT: vpextrw $0, %xmm0, %eax
-; AVX512BW-NEXT: shll $16, %eax
-; AVX512BW-NEXT: vmovd %eax, %xmm0
-; AVX512BW-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
-; AVX512BW-NEXT: callq __truncsfbf2 at PLT
-; AVX512BW-NEXT: popq %rax
-; AVX512BW-NEXT: .cfi_def_cfa_offset 8
-; AVX512BW-NEXT: retq
-entry:
-
- %sub1 = fsub bfloat %a, %b
- %add = fadd bfloat %sub1, %b
- %sub2 = fsub bfloat %add, %sub1
- %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat %sub2)
- %result = fsub bfloat %canonicalized, %b
- ret bfloat %result
-}
-
define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 {
; SSE-LABEL: canonicalize_fp64:
; SSE: # %bb.0: # %start
@@ -565,6 +401,7 @@ define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 {
; SSE-NEXT: maxsd %xmm0, %xmm1
; SSE-NEXT: andnpd %xmm1, %xmm2
; SSE-NEXT: orpd %xmm3, %xmm2
+; SSE-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE-NEXT: movapd %xmm2, %xmm0
; SSE-NEXT: retq
;
@@ -577,6 +414,7 @@ define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 {
; SSE2-NEXT: maxsd %xmm0, %xmm1
; SSE2-NEXT: andnpd %xmm1, %xmm2
; SSE2-NEXT: orpd %xmm3, %xmm2
+; SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE2-NEXT: movapd %xmm2, %xmm0
; SSE2-NEXT: retq
;
@@ -585,6 +423,7 @@ define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 {
; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX2-LABEL: canonicalize_fp64:
@@ -592,6 +431,7 @@ define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 {
; AVX2-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
; AVX2-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX2-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: canonicalize_fp64:
@@ -599,7 +439,7 @@ define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 {
; AVX512F-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
; AVX512F-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
; AVX512F-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1}
-; AVX512F-NEXT: vmovapd %xmm2, %xmm0
+; AVX512F-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: canonicalize_fp64:
@@ -607,7 +447,7 @@ define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 {
; AVX512BW-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
; AVX512BW-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
; AVX512BW-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1}
-; AVX512BW-NEXT: vmovapd %xmm2, %xmm0
+; AVX512BW-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; AVX512BW-NEXT: retq
start:
@@ -629,6 +469,7 @@ define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 {
; SSE-NEXT: maxss %xmm0, %xmm1
; SSE-NEXT: andnps %xmm1, %xmm2
; SSE-NEXT: orps %xmm3, %xmm2
+; SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: retq
;
@@ -641,6 +482,7 @@ define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 {
; SSE2-NEXT: maxss %xmm0, %xmm1
; SSE2-NEXT: andnps %xmm1, %xmm2
; SSE2-NEXT: orps %xmm3, %xmm2
+; SSE2-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: retq
;
@@ -649,6 +491,7 @@ define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 {
; AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm2
; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX2-LABEL: canonicalize_fp32:
@@ -656,6 +499,7 @@ define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 {
; AVX2-NEXT: vmaxss %xmm0, %xmm1, %xmm2
; AVX2-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: canonicalize_fp32:
@@ -663,7 +507,7 @@ define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 {
; AVX512F-NEXT: vmaxss %xmm0, %xmm1, %xmm2
; AVX512F-NEXT: vcmpunordss %xmm0, %xmm0, %k1
; AVX512F-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
-; AVX512F-NEXT: vmovaps %xmm2, %xmm0
+; AVX512F-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: canonicalize_fp32:
@@ -671,7 +515,7 @@ define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 {
; AVX512BW-NEXT: vmaxss %xmm0, %xmm1, %xmm2
; AVX512BW-NEXT: vcmpunordss %xmm0, %xmm0, %k1
; AVX512BW-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
-; AVX512BW-NEXT: vmovaps %xmm2, %xmm0
+; AVX512BW-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; AVX512BW-NEXT: retq
start:
@@ -962,39 +806,48 @@ define void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 {
ret void
}
-define void @v_test_canonicalize__bfloat(bfloat addrspace(1)* %out) {
-; SSE-LABEL: v_test_canonicalize__bfloat:
-; SSE: # %bb.0: # %entry
+define void @canonicalize_undef(double addrspace(1)* %out) {
+; SSE-LABEL: canonicalize_undef:
+; SSE: # %bb.0:
+; SSE-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; SSE-NEXT: movq %rax, (%rdi)
; SSE-NEXT: retq
;
-; SSE2-LABEL: v_test_canonicalize__bfloat:
-; SSE2: # %bb.0: # %entry
+; SSE2-LABEL: canonicalize_undef:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; SSE2-NEXT: movq %rax, (%rdi)
; SSE2-NEXT: retq
;
-; AVX-LABEL: v_test_canonicalize__bfloat:
-; AVX: # %bb.0: # %entry
+; AVX-LABEL: canonicalize_undef:
+; AVX: # %bb.0:
+; AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; AVX-NEXT: movq %rax, (%rdi)
; AVX-NEXT: retq
;
-; AVX2-LABEL: v_test_canonicalize__bfloat:
-; AVX2: # %bb.0: # %entry
+; AVX2-LABEL: canonicalize_undef:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; AVX2-NEXT: movq %rax, (%rdi)
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: v_test_canonicalize__bfloat:
-; AVX512F: # %bb.0: # %entry
+; AVX512F-LABEL: canonicalize_undef:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; AVX512F-NEXT: movq %rax, (%rdi)
; AVX512F-NEXT: retq
;
-; AVX512BW-LABEL: v_test_canonicalize__bfloat:
-; AVX512BW: # %bb.0: # %entry
+; AVX512BW-LABEL: canonicalize_undef:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; AVX512BW-NEXT: movq %rax, (%rdi)
; AVX512BW-NEXT: retq
-entry:
- %val = load bfloat, bfloat addrspace(1)* %out
- %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat %val)
- store bfloat %canonicalized, bfloat addrspace(1)* %out
+ %canonicalized = call double @llvm.canonicalize.f64(double undef)
+ store double %canonicalized, double addrspace(1)* %out
ret void
}
declare double @llvm.canonicalize.f64(double)
declare float @llvm.canonicalize.f32(float)
-declare bfloat @llvm.canonicalize.bf16(bfloat)
declare x86_fp80 @llvm.canonicalize.f80(x86_fp80)
declare half @llvm.canonicalize.f16(half)
>From cbe7d0b91e59f5b05e9fd97a679b8304f10a42de Mon Sep 17 00:00:00 2001
From: Pawan Anil Nirpal <pawan.anil.nirpal at intel.com>
Date: Wed, 11 Sep 2024 11:22:36 +0200
Subject: [PATCH 5/5] fix run lines to reuse checks
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 21 +-
llvm/test/CodeGen/X86/canonicalize-vars.ll | 448 ++++-----------------
2 files changed, 78 insertions(+), 391 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index de53fd4f20c786..91f03ccac779f2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58150,25 +58150,8 @@ SDValue combineCanonicalize(SDNode *Node, SelectionDAG &DAG) {
SDLoc dl(Node);
// Canonicalize scalar variable FP Nodes.
- SDValue MulNode;
- SDValue One;
- if (VT == MVT::f32 || VT == MVT::f64) {
- One = DAG.getConstantFP(1.0f, dl, VT);
- } else if (VT == MVT::f80) {
- APFloat Val = APFloat::getOne(APFloat::x87DoubleExtended());
- One = DAG.getConstantFP(Val, dl, VT);
- } else if (VT == MVT::f16) {
- APFloat Val(APFloat::IEEEhalf(), "1.0");
- One = DAG.getConstantFP(Val, dl, VT);
- } else if (VT == MVT::bf16) {
- APFloat Val(APFloat::BFloat(), "1.0");
- One = DAG.getConstantFP(Val, dl, VT);
- } else {
- // Is it better to assert? when we encounter an unknown FP type,Than to
- // just replace with the operand!
- return Operand;
- }
-
+ SDValue One =
+ DAG.getNode(ISD::SINT_TO_FP, dl, VT, DAG.getConstant(1, dl, MVT::i32));
// TODO: Fix Crash for bf16 when generating strict_fmul as it
// leads to a error : SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul t0,
// ConstantFP:bf16<APFloat(16256)>, t5 LLVM ERROR: Do not know how to soft
diff --git a/llvm/test/CodeGen/X86/canonicalize-vars.ll b/llvm/test/CodeGen/X86/canonicalize-vars.ll
index a9564496324703..d82749f8357368 100644
--- a/llvm/test/CodeGen/X86/canonicalize-vars.ll
+++ b/llvm/test/CodeGen/X86/canonicalize-vars.ll
@@ -1,10 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5
-; RUN: llc -mattr=sse -mtriple=x86_64 < %s | FileCheck %s -check-prefix=SSE
-; RUN: llc -mattr=sse2 -mtriple=x86_64 < %s | FileCheck %s -check-prefix=SSE2
-; RUN: llc -mattr=+avx -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX
-; RUN: llc -mattr=+avx2 -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX2
-; RUN: llc -mattr=+avx512f -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX512F
-; RUN: llc -mattr=+avx512bw -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX512BW
+; RUN: llc -mattr=+sse2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=SSE,SSE2
+; RUN: llc -mattr=+avx -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX1
+; RUN: llc -mattr=+avx2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX2
+; RUN: llc -mattr=+avx512f -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX512F
+; RUN: llc -mattr=+avx512bw -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX512BW
define float @canon_fp32_varargsf32(float %a) {
@@ -13,30 +12,10 @@ define float @canon_fp32_varargsf32(float %a) {
; SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: retq
;
-; SSE2-LABEL: canon_fp32_varargsf32:
-; SSE2: # %bb.0:
-; SSE2-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: retq
-;
; AVX-LABEL: canon_fp32_varargsf32:
; AVX: # %bb.0:
; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
-;
-; AVX2-LABEL: canon_fp32_varargsf32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: canon_fp32_varargsf32:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: canon_fp32_varargsf32:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512BW-NEXT: retq
%canonicalized = call float @llvm.canonicalize.f32(float %a)
ret float %canonicalized
}
@@ -49,40 +28,12 @@ define x86_fp80 @canon_fp32_varargsf80(x86_fp80 %a) {
; SSE-NEXT: fmulp %st, %st(1)
; SSE-NEXT: retq
;
-; SSE2-LABEL: canon_fp32_varargsf80:
-; SSE2: # %bb.0:
-; SSE2-NEXT: fldt {{[0-9]+}}(%rsp)
-; SSE2-NEXT: fld1
-; SSE2-NEXT: fmulp %st, %st(1)
-; SSE2-NEXT: retq
-;
; AVX-LABEL: canon_fp32_varargsf80:
; AVX: # %bb.0:
; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
; AVX-NEXT: fld1
; AVX-NEXT: fmulp %st, %st(1)
; AVX-NEXT: retq
-;
-; AVX2-LABEL: canon_fp32_varargsf80:
-; AVX2: # %bb.0:
-; AVX2-NEXT: fldt {{[0-9]+}}(%rsp)
-; AVX2-NEXT: fld1
-; AVX2-NEXT: fmulp %st, %st(1)
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: canon_fp32_varargsf80:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp)
-; AVX512F-NEXT: fld1
-; AVX512F-NEXT: fmulp %st, %st(1)
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: canon_fp32_varargsf80:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: fldt {{[0-9]+}}(%rsp)
-; AVX512BW-NEXT: fld1
-; AVX512BW-NEXT: fmulp %st, %st(1)
-; AVX512BW-NEXT: retq
%canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %a)
ret x86_fp80 %canonicalized
}
@@ -124,76 +75,40 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) {
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq
;
-; SSE2-LABEL: complex_canonicalize_fmul_half:
-; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: pushq %rax
-; SSE2-NEXT: .cfi_def_cfa_offset 16
-; SSE2-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; SSE2-NEXT: callq __extendhfsf2 at PLT
-; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
-; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
-; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT: callq __extendhfsf2 at PLT
-; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload
-; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero
-; SSE2-NEXT: subss %xmm0, %xmm1
-; SSE2-NEXT: movaps %xmm1, %xmm0
-; SSE2-NEXT: callq __truncsfhf2 at PLT
-; SSE2-NEXT: callq __extendhfsf2 at PLT
-; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
-; SSE2-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
-; SSE2-NEXT: callq __truncsfhf2 at PLT
-; SSE2-NEXT: callq __extendhfsf2 at PLT
-; SSE2-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload
-; SSE2-NEXT: callq __truncsfhf2 at PLT
-; SSE2-NEXT: callq __extendhfsf2 at PLT
-; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
-; SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: callq __extendhfsf2 at PLT
-; SSE2-NEXT: mulss (%rsp), %xmm0 # 4-byte Folded Reload
-; SSE2-NEXT: callq __truncsfhf2 at PLT
-; SSE2-NEXT: callq __extendhfsf2 at PLT
-; SSE2-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
-; SSE2-NEXT: callq __truncsfhf2 at PLT
-; SSE2-NEXT: popq %rax
-; SSE2-NEXT: .cfi_def_cfa_offset 8
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: complex_canonicalize_fmul_half:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: pushq %rax
-; AVX-NEXT: .cfi_def_cfa_offset 16
-; AVX-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; AVX-NEXT: callq __extendhfsf2 at PLT
-; AVX-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill
-; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
-; AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
-; AVX-NEXT: callq __extendhfsf2 at PLT
-; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; AVX-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload
-; AVX-NEXT: # xmm1 = mem[0],zero,zero,zero
-; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
-; AVX-NEXT: callq __truncsfhf2 at PLT
-; AVX-NEXT: callq __extendhfsf2 at PLT
-; AVX-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill
-; AVX-NEXT: vaddss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
-; AVX-NEXT: callq __truncsfhf2 at PLT
-; AVX-NEXT: callq __extendhfsf2 at PLT
-; AVX-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
-; AVX-NEXT: callq __truncsfhf2 at PLT
-; AVX-NEXT: callq __extendhfsf2 at PLT
-; AVX-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill
-; AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: callq __extendhfsf2 at PLT
-; AVX-NEXT: vmulss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
-; AVX-NEXT: callq __truncsfhf2 at PLT
-; AVX-NEXT: callq __extendhfsf2 at PLT
-; AVX-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
-; AVX-NEXT: callq __truncsfhf2 at PLT
-; AVX-NEXT: popq %rax
-; AVX-NEXT: .cfi_def_cfa_offset 8
-; AVX-NEXT: retq
+; AVX1-LABEL: complex_canonicalize_fmul_half:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: pushq %rax
+; AVX1-NEXT: .cfi_def_cfa_offset 16
+; AVX1-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; AVX1-NEXT: callq __extendhfsf2 at PLT
+; AVX1-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill
+; AVX1-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: callq __extendhfsf2 at PLT
+; AVX1-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; AVX1-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload
+; AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero
+; AVX1-NEXT: vsubss %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: callq __truncsfhf2 at PLT
+; AVX1-NEXT: callq __extendhfsf2 at PLT
+; AVX1-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill
+; AVX1-NEXT: vaddss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX1-NEXT: callq __truncsfhf2 at PLT
+; AVX1-NEXT: callq __extendhfsf2 at PLT
+; AVX1-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX1-NEXT: callq __truncsfhf2 at PLT
+; AVX1-NEXT: callq __extendhfsf2 at PLT
+; AVX1-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill
+; AVX1-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: callq __extendhfsf2 at PLT
+; AVX1-NEXT: vmulss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX1-NEXT: callq __truncsfhf2 at PLT
+; AVX1-NEXT: callq __extendhfsf2 at PLT
+; AVX1-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX1-NEXT: callq __truncsfhf2 at PLT
+; AVX1-NEXT: popq %rax
+; AVX1-NEXT: .cfi_def_cfa_offset 8
+; AVX1-NEXT: retq
;
; AVX2-LABEL: complex_canonicalize_fmul_half:
; AVX2: # %bb.0: # %entry
@@ -317,19 +232,6 @@ define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) {
; SSE-NEXT: fsubp %st, %st(1)
; SSE-NEXT: retq
;
-; SSE2-LABEL: complex_canonicalize_fmul_x86_fp80:
-; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: fldt {{[0-9]+}}(%rsp)
-; SSE2-NEXT: fldt {{[0-9]+}}(%rsp)
-; SSE2-NEXT: fsub %st(1), %st
-; SSE2-NEXT: fld %st(0)
-; SSE2-NEXT: fadd %st(2), %st
-; SSE2-NEXT: fsubp %st, %st(1)
-; SSE2-NEXT: fld1
-; SSE2-NEXT: fmulp %st, %st(1)
-; SSE2-NEXT: fsubp %st, %st(1)
-; SSE2-NEXT: retq
-;
; AVX-LABEL: complex_canonicalize_fmul_x86_fp80:
; AVX: # %bb.0: # %entry
; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
@@ -342,45 +244,6 @@ define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) {
; AVX-NEXT: fmulp %st, %st(1)
; AVX-NEXT: fsubp %st, %st(1)
; AVX-NEXT: retq
-;
-; AVX2-LABEL: complex_canonicalize_fmul_x86_fp80:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: fldt {{[0-9]+}}(%rsp)
-; AVX2-NEXT: fldt {{[0-9]+}}(%rsp)
-; AVX2-NEXT: fsub %st(1), %st
-; AVX2-NEXT: fld %st(0)
-; AVX2-NEXT: fadd %st(2), %st
-; AVX2-NEXT: fsubp %st, %st(1)
-; AVX2-NEXT: fld1
-; AVX2-NEXT: fmulp %st, %st(1)
-; AVX2-NEXT: fsubp %st, %st(1)
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: complex_canonicalize_fmul_x86_fp80:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp)
-; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp)
-; AVX512F-NEXT: fsub %st(1), %st
-; AVX512F-NEXT: fld %st(0)
-; AVX512F-NEXT: fadd %st(2), %st
-; AVX512F-NEXT: fsubp %st, %st(1)
-; AVX512F-NEXT: fld1
-; AVX512F-NEXT: fmulp %st, %st(1)
-; AVX512F-NEXT: fsubp %st, %st(1)
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: complex_canonicalize_fmul_x86_fp80:
-; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: fldt {{[0-9]+}}(%rsp)
-; AVX512BW-NEXT: fldt {{[0-9]+}}(%rsp)
-; AVX512BW-NEXT: fsub %st(1), %st
-; AVX512BW-NEXT: fld %st(0)
-; AVX512BW-NEXT: fadd %st(2), %st
-; AVX512BW-NEXT: fsubp %st, %st(1)
-; AVX512BW-NEXT: fld1
-; AVX512BW-NEXT: fmulp %st, %st(1)
-; AVX512BW-NEXT: fsubp %st, %st(1)
-; AVX512BW-NEXT: retq
entry:
%mul1 = fsub x86_fp80 %a, %b
@@ -405,26 +268,13 @@ define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 {
; SSE-NEXT: movapd %xmm2, %xmm0
; SSE-NEXT: retq
;
-; SSE2-LABEL: canonicalize_fp64:
-; SSE2: # %bb.0: # %start
-; SSE2-NEXT: movapd %xmm0, %xmm2
-; SSE2-NEXT: cmpunordsd %xmm0, %xmm2
-; SSE2-NEXT: movapd %xmm2, %xmm3
-; SSE2-NEXT: andpd %xmm1, %xmm3
-; SSE2-NEXT: maxsd %xmm0, %xmm1
-; SSE2-NEXT: andnpd %xmm1, %xmm2
-; SSE2-NEXT: orpd %xmm3, %xmm2
-; SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
-; SSE2-NEXT: movapd %xmm2, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: canonicalize_fp64:
-; AVX: # %bb.0: # %start
-; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
-; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
-; AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: canonicalize_fp64:
+; AVX1: # %bb.0: # %start
+; AVX1-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
+; AVX1-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX1-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: retq
;
; AVX2-LABEL: canonicalize_fp64:
; AVX2: # %bb.0: # %start
@@ -473,26 +323,13 @@ define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 {
; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: retq
;
-; SSE2-LABEL: canonicalize_fp32:
-; SSE2: # %bb.0: # %start
-; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: cmpunordss %xmm0, %xmm2
-; SSE2-NEXT: movaps %xmm2, %xmm3
-; SSE2-NEXT: andps %xmm1, %xmm3
-; SSE2-NEXT: maxss %xmm0, %xmm1
-; SSE2-NEXT: andnps %xmm1, %xmm2
-; SSE2-NEXT: orps %xmm3, %xmm2
-; SSE2-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
-; SSE2-NEXT: movaps %xmm2, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: canonicalize_fp32:
-; AVX: # %bb.0: # %start
-; AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm2
-; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
-; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: canonicalize_fp32:
+; AVX1: # %bb.0: # %start
+; AVX1-NEXT: vmaxss %xmm0, %xmm1, %xmm2
+; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX1-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: retq
;
; AVX2-LABEL: canonicalize_fp32:
; AVX2: # %bb.0: # %start
@@ -535,40 +372,12 @@ define void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 {
; SSE-NEXT: movss %xmm0, (%rdi)
; SSE-NEXT: retq
;
-; SSE2-LABEL: v_test_canonicalize_var_f32:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; SSE2-NEXT: mulss (%rdi), %xmm0
-; SSE2-NEXT: movss %xmm0, (%rdi)
-; SSE2-NEXT: retq
-;
; AVX-LABEL: v_test_canonicalize_var_f32:
; AVX: # %bb.0:
; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: vmulss (%rdi), %xmm0, %xmm0
; AVX-NEXT: vmovss %xmm0, (%rdi)
; AVX-NEXT: retq
-;
-; AVX2-LABEL: v_test_canonicalize_var_f32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; AVX2-NEXT: vmulss (%rdi), %xmm0, %xmm0
-; AVX2-NEXT: vmovss %xmm0, (%rdi)
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: v_test_canonicalize_var_f32:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; AVX512F-NEXT: vmulss (%rdi), %xmm0, %xmm0
-; AVX512F-NEXT: vmovss %xmm0, (%rdi)
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: v_test_canonicalize_var_f32:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; AVX512BW-NEXT: vmulss (%rdi), %xmm0, %xmm0
-; AVX512BW-NEXT: vmovss %xmm0, (%rdi)
-; AVX512BW-NEXT: retq
%val = load float, float addrspace(1)* %out
%canonicalized = call float @llvm.canonicalize.f32(float %val)
store float %canonicalized, float addrspace(1)* %out
@@ -584,14 +393,6 @@ define void @v_test_canonicalize_x86_fp80(x86_fp80 addrspace(1)* %out) #1 {
; SSE-NEXT: fstpt (%rdi)
; SSE-NEXT: retq
;
-; SSE2-LABEL: v_test_canonicalize_x86_fp80:
-; SSE2: # %bb.0:
-; SSE2-NEXT: fldt (%rdi)
-; SSE2-NEXT: fld1
-; SSE2-NEXT: fmulp %st, %st(1)
-; SSE2-NEXT: fstpt (%rdi)
-; SSE2-NEXT: retq
-;
; AVX-LABEL: v_test_canonicalize_x86_fp80:
; AVX: # %bb.0:
; AVX-NEXT: fldt (%rdi)
@@ -599,30 +400,6 @@ define void @v_test_canonicalize_x86_fp80(x86_fp80 addrspace(1)* %out) #1 {
; AVX-NEXT: fmulp %st, %st(1)
; AVX-NEXT: fstpt (%rdi)
; AVX-NEXT: retq
-;
-; AVX2-LABEL: v_test_canonicalize_x86_fp80:
-; AVX2: # %bb.0:
-; AVX2-NEXT: fldt (%rdi)
-; AVX2-NEXT: fld1
-; AVX2-NEXT: fmulp %st, %st(1)
-; AVX2-NEXT: fstpt (%rdi)
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: v_test_canonicalize_x86_fp80:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: fldt (%rdi)
-; AVX512F-NEXT: fld1
-; AVX512F-NEXT: fmulp %st, %st(1)
-; AVX512F-NEXT: fstpt (%rdi)
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: v_test_canonicalize_x86_fp80:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: fldt (%rdi)
-; AVX512BW-NEXT: fld1
-; AVX512BW-NEXT: fmulp %st, %st(1)
-; AVX512BW-NEXT: fstpt (%rdi)
-; AVX512BW-NEXT: retq
%val = load x86_fp80, x86_fp80 addrspace(1)* %out
%canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %val)
store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out
@@ -653,50 +430,27 @@ define void @v_test_canonicalize__half(half addrspace(1)* %out) {
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq
;
-; SSE2-LABEL: v_test_canonicalize__half:
-; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: pushq %rbx
-; SSE2-NEXT: .cfi_def_cfa_offset 16
-; SSE2-NEXT: subq $16, %rsp
-; SSE2-NEXT: .cfi_def_cfa_offset 32
-; SSE2-NEXT: .cfi_offset %rbx, -16
-; SSE2-NEXT: movq %rdi, %rbx
-; SSE2-NEXT: pinsrw $0, (%rdi), %xmm0
-; SSE2-NEXT: callq __extendhfsf2 at PLT
-; SSE2-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
-; SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: callq __extendhfsf2 at PLT
-; SSE2-NEXT: mulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
-; SSE2-NEXT: callq __truncsfhf2 at PLT
-; SSE2-NEXT: pextrw $0, %xmm0, %eax
-; SSE2-NEXT: movw %ax, (%rbx)
-; SSE2-NEXT: addq $16, %rsp
-; SSE2-NEXT: .cfi_def_cfa_offset 16
-; SSE2-NEXT: popq %rbx
-; SSE2-NEXT: .cfi_def_cfa_offset 8
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: v_test_canonicalize__half:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: pushq %rbx
-; AVX-NEXT: .cfi_def_cfa_offset 16
-; AVX-NEXT: subq $16, %rsp
-; AVX-NEXT: .cfi_def_cfa_offset 32
-; AVX-NEXT: .cfi_offset %rbx, -16
-; AVX-NEXT: movq %rdi, %rbx
-; AVX-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
-; AVX-NEXT: callq __extendhfsf2 at PLT
-; AVX-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
-; AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: callq __extendhfsf2 at PLT
-; AVX-NEXT: vmulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
-; AVX-NEXT: callq __truncsfhf2 at PLT
-; AVX-NEXT: vpextrw $0, %xmm0, (%rbx)
-; AVX-NEXT: addq $16, %rsp
-; AVX-NEXT: .cfi_def_cfa_offset 16
-; AVX-NEXT: popq %rbx
-; AVX-NEXT: .cfi_def_cfa_offset 8
-; AVX-NEXT: retq
+; AVX1-LABEL: v_test_canonicalize__half:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: pushq %rbx
+; AVX1-NEXT: .cfi_def_cfa_offset 16
+; AVX1-NEXT: subq $16, %rsp
+; AVX1-NEXT: .cfi_def_cfa_offset 32
+; AVX1-NEXT: .cfi_offset %rbx, -16
+; AVX1-NEXT: movq %rdi, %rbx
+; AVX1-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
+; AVX1-NEXT: callq __extendhfsf2 at PLT
+; AVX1-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; AVX1-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: callq __extendhfsf2 at PLT
+; AVX1-NEXT: vmulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
+; AVX1-NEXT: callq __truncsfhf2 at PLT
+; AVX1-NEXT: vpextrw $0, %xmm0, (%rbx)
+; AVX1-NEXT: addq $16, %rsp
+; AVX1-NEXT: .cfi_def_cfa_offset 16
+; AVX1-NEXT: popq %rbx
+; AVX1-NEXT: .cfi_def_cfa_offset 8
+; AVX1-NEXT: retq
;
; AVX2-LABEL: v_test_canonicalize__half:
; AVX2: # %bb.0: # %entry
@@ -766,40 +520,12 @@ define void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 {
; SSE-NEXT: movsd %xmm0, (%rdi)
; SSE-NEXT: retq
;
-; SSE2-LABEL: v_test_canonicalize_var_f64:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
-; SSE2-NEXT: mulsd (%rdi), %xmm0
-; SSE2-NEXT: movsd %xmm0, (%rdi)
-; SSE2-NEXT: retq
-;
; AVX-LABEL: v_test_canonicalize_var_f64:
; AVX: # %bb.0:
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
; AVX-NEXT: vmulsd (%rdi), %xmm0, %xmm0
; AVX-NEXT: vmovsd %xmm0, (%rdi)
; AVX-NEXT: retq
-;
-; AVX2-LABEL: v_test_canonicalize_var_f64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
-; AVX2-NEXT: vmulsd (%rdi), %xmm0, %xmm0
-; AVX2-NEXT: vmovsd %xmm0, (%rdi)
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: v_test_canonicalize_var_f64:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
-; AVX512F-NEXT: vmulsd (%rdi), %xmm0, %xmm0
-; AVX512F-NEXT: vmovsd %xmm0, (%rdi)
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: v_test_canonicalize_var_f64:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
-; AVX512BW-NEXT: vmulsd (%rdi), %xmm0, %xmm0
-; AVX512BW-NEXT: vmovsd %xmm0, (%rdi)
-; AVX512BW-NEXT: retq
%val = load double, double addrspace(1)* %out
%canonicalized = call double @llvm.canonicalize.f64(double %val)
store double %canonicalized, double addrspace(1)* %out
@@ -813,35 +539,11 @@ define void @canonicalize_undef(double addrspace(1)* %out) {
; SSE-NEXT: movq %rax, (%rdi)
; SSE-NEXT: retq
;
-; SSE2-LABEL: canonicalize_undef:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; SSE2-NEXT: movq %rax, (%rdi)
-; SSE2-NEXT: retq
-;
; AVX-LABEL: canonicalize_undef:
; AVX: # %bb.0:
; AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
; AVX-NEXT: movq %rax, (%rdi)
; AVX-NEXT: retq
-;
-; AVX2-LABEL: canonicalize_undef:
-; AVX2: # %bb.0:
-; AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; AVX2-NEXT: movq %rax, (%rdi)
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: canonicalize_undef:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; AVX512F-NEXT: movq %rax, (%rdi)
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: canonicalize_undef:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
-; AVX512BW-NEXT: movq %rax, (%rdi)
-; AVX512BW-NEXT: retq
%canonicalized = call double @llvm.canonicalize.f64(double undef)
store double %canonicalized, double addrspace(1)* %out
ret void
@@ -851,3 +553,5 @@ declare double @llvm.canonicalize.f64(double)
declare float @llvm.canonicalize.f32(float)
declare x86_fp80 @llvm.canonicalize.f80(x86_fp80)
declare half @llvm.canonicalize.f16(half)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; SSE2: {{.*}}
More information about the llvm-commits
mailing list