[llvm] [X86][GlobalISel] Support G_FADD, G_FSUB, G_FMUL, G_FDIV (PR #87339)

Tue Apr 2 06:18:09 PDT 2024

https://github.com/e-kud updated https://github.com/llvm/llvm-project/pull/87339

>From 77c94380dc087ba83f91ab8950e5f4d7cde8c4d4 Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov <evgenii.kudriashov at intel.com>
Date: Tue, 2 Apr 2024 02:57:07 -0700
Subject: [PATCH 1/2] [X86][GlobalISel] Support G_FADD, G_FSUB, G_FMUL, G_FDIV

* Add vector legalization
* Add vector tests and reorganize existing tests
---
 .../lib/Target/X86/GISel/X86LegalizerInfo.cpp |   7 +-
 .../CodeGen/X86/GlobalISel/fadd-scalar.ll     |  21 ---
 .../CodeGen/X86/GlobalISel/fdiv-scalar.ll     |  21 ---
 .../CodeGen/X86/GlobalISel/fmul-scalar.ll     |  21 ---
 .../CodeGen/X86/GlobalISel/fsub-scalar.ll     |  21 ---
 llvm/test/CodeGen/X86/isel-fadd-vector.ll     | 134 ++++++++++++++++++
 llvm/test/CodeGen/X86/isel-fadd.ll            |  52 +++++++
 llvm/test/CodeGen/X86/isel-fdiv-vector.ll     | 134 ++++++++++++++++++
 llvm/test/CodeGen/X86/isel-fdiv.ll            |  52 +++++++
 llvm/test/CodeGen/X86/isel-fmul-vector.ll     | 134 ++++++++++++++++++
 llvm/test/CodeGen/X86/isel-fmul.ll            |  52 +++++++
 llvm/test/CodeGen/X86/isel-fsub-vector.ll     | 134 ++++++++++++++++++
 llvm/test/CodeGen/X86/isel-fsub.ll            |  52 +++++++
 13 files changed, 750 insertions(+), 85 deletions(-)
 delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/fadd-scalar.ll
 delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/fdiv-scalar.ll
 delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/fmul-scalar.ll
 delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/fsub-scalar.ll
 create mode 100644 llvm/test/CodeGen/X86/isel-fadd-vector.ll
 create mode 100644 llvm/test/CodeGen/X86/isel-fadd.ll
 create mode 100644 llvm/test/CodeGen/X86/isel-fdiv-vector.ll
 create mode 100644 llvm/test/CodeGen/X86/isel-fdiv.ll
 create mode 100644 llvm/test/CodeGen/X86/isel-fmul-vector.ll
 create mode 100644 llvm/test/CodeGen/X86/isel-fmul.ll
 create mode 100644 llvm/test/CodeGen/X86/isel-fsub-vector.ll
 create mode 100644 llvm/test/CodeGen/X86/isel-fsub.ll

diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 07041cc5b0491c..3be3121bab66f8 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -429,7 +429,12 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
                (HasAVX && typeInSet(0, {v8s32, v4s64})(Query)) ||
                (HasAVX512 && typeInSet(0, {v16s32, v8s64})(Query)) ||
                (UseX87 && typeInSet(0, {s80})(Query));
-      });
+      })
+      .clampMinNumElements(0, s32, 4)
+      .clampMinNumElements(0, s64, 2)
+      .clampMaxNumElements(0, s32, HasAVX512 ? 16 : (HasAVX ? 8 : 4))
+      .clampMaxNumElements(0, s64, HasAVX512 ? 8 : (HasAVX ? 4 : 2))
+      .scalarize(0);
 
   // fp comparison
   getActionDefinitionsBuilder(G_FCMP)
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fadd-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/fadd-scalar.ll
deleted file mode 100644
index 73be29e5d8438b..00000000000000
--- a/llvm/test/CodeGen/X86/GlobalISel/fadd-scalar.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=X64
-
-define float @test_fadd_float(float %arg1, float %arg2) {
-; X64-LABEL: test_fadd_float:
-; X64:       # %bb.0:
-; X64-NEXT:    addss %xmm1, %xmm0
-; X64-NEXT:    retq
-  %ret = fadd float %arg1, %arg2
-  ret float %ret
-}
-
-define double @test_fadd_double(double %arg1, double %arg2) {
-; X64-LABEL: test_fadd_double:
-; X64:       # %bb.0:
-; X64-NEXT:    addsd %xmm1, %xmm0
-; X64-NEXT:    retq
-  %ret = fadd double %arg1, %arg2
-  ret double %ret
-}
-
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fdiv-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/fdiv-scalar.ll
deleted file mode 100644
index f2dc6de08528ed..00000000000000
--- a/llvm/test/CodeGen/X86/GlobalISel/fdiv-scalar.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=X64
-
-define float @test_fdiv_float(float %arg1, float %arg2) {
-; X64-LABEL: test_fdiv_float:
-; X64:       # %bb.0:
-; X64-NEXT:    divss %xmm1, %xmm0
-; X64-NEXT:    retq
-  %ret = fdiv float %arg1, %arg2
-  ret float %ret
-}
-
-define double @test_fdiv_double(double %arg1, double %arg2) {
-; X64-LABEL: test_fdiv_double:
-; X64:       # %bb.0:
-; X64-NEXT:    divsd %xmm1, %xmm0
-; X64-NEXT:    retq
-  %ret = fdiv double %arg1, %arg2
-  ret double %ret
-}
-
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fmul-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/fmul-scalar.ll
deleted file mode 100644
index 187593d082c543..00000000000000
--- a/llvm/test/CodeGen/X86/GlobalISel/fmul-scalar.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=X64
-
-define float @test_fmul_float(float %arg1, float %arg2) {
-; X64-LABEL: test_fmul_float:
-; X64:       # %bb.0:
-; X64-NEXT:    mulss %xmm1, %xmm0
-; X64-NEXT:    retq
-  %ret = fmul float %arg1, %arg2
-  ret float %ret
-}
-
-define double @test_fmul_double(double %arg1, double %arg2) {
-; X64-LABEL: test_fmul_double:
-; X64:       # %bb.0:
-; X64-NEXT:    mulsd %xmm1, %xmm0
-; X64-NEXT:    retq
-  %ret = fmul double %arg1, %arg2
-  ret double %ret
-}
-
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fsub-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/fsub-scalar.ll
deleted file mode 100644
index b2a82834af5bbe..00000000000000
--- a/llvm/test/CodeGen/X86/GlobalISel/fsub-scalar.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=X64
-
-define float @test_fsub_float(float %arg1, float %arg2) {
-; X64-LABEL: test_fsub_float:
-; X64:       # %bb.0:
-; X64-NEXT:    subss %xmm1, %xmm0
-; X64-NEXT:    retq
-  %ret = fsub float %arg1, %arg2
-  ret float %ret
-}
-
-define double @test_fsub_double(double %arg1, double %arg2) {
-; X64-LABEL: test_fsub_double:
-; X64:       # %bb.0:
-; X64-NEXT:    subsd %xmm1, %xmm0
-; X64-NEXT:    retq
-  %ret = fsub double %arg1, %arg2
-  ret double %ret
-}
-
diff --git a/llvm/test/CodeGen/X86/isel-fadd-vector.ll b/llvm/test/CodeGen/X86/isel-fadd-vector.ll
new file mode 100644
index 00000000000000..b44818d49344db
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-fadd-vector.ll
@@ -0,0 +1,134 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=AVX512
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=AVX512
+
+define <4 x float> @test_fadd_v4s32(<4 x float> %arg1, <4 x float> %arg2) {
+; SSE-LABEL: test_fadd_v4s32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    addps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fadd_v4s32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fadd_v4s32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vaddps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+  %ret = fadd <4 x float> %arg1, %arg2
+  ret <4 x float> %ret
+}
+
+define <2 x double> @test_fadd_v2s64(<2 x double> %arg1, <2 x double> %arg2) {
+; SSE-LABEL: test_fadd_v2s64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    addpd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fadd_v2s64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fadd_v2s64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+  %ret = fadd <2 x double> %arg1, %arg2
+  ret <2 x double> %ret
+}
+
+define <8 x float> @test_fadd_v8s32(<8 x float> %arg1, <8 x float> %arg2) {
+; SSE-LABEL: test_fadd_v8s32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    addps %xmm2, %xmm0
+; SSE-NEXT:    addps %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fadd_v8s32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fadd_v8s32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vaddps %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %ret = fadd <8 x float> %arg1, %arg2
+  ret <8 x float> %ret
+}
+
+define <4 x double> @test_fadd_v4s64(<4 x double> %arg1, <4 x double> %arg2) {
+; SSE-LABEL: test_fadd_v4s64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    addpd %xmm2, %xmm0
+; SSE-NEXT:    addpd %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fadd_v4s64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fadd_v4s64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %ret = fadd <4 x double> %arg1, %arg2
+  ret <4 x double> %ret
+}
+
+define <16 x float> @test_fadd_v16s32(<16 x float> %arg1, <16 x float> %arg2) {
+; SSE-LABEL: test_fadd_v16s32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    addps %xmm4, %xmm0
+; SSE-NEXT:    addps %xmm5, %xmm1
+; SSE-NEXT:    addps %xmm6, %xmm2
+; SSE-NEXT:    addps %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fadd_v16s32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vaddps %ymm2, %ymm0, %ymm0
+; AVX-NEXT:    vaddps %ymm3, %ymm1, %ymm1
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fadd_v16s32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vaddps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+  %ret = fadd <16 x float> %arg1, %arg2
+  ret <16 x float> %ret
+}
+
+define <8 x double> @test_fadd_v8s64(<8 x double> %arg1, <8 x double> %arg2) {
+; SSE-LABEL: test_fadd_v8s64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    addpd %xmm4, %xmm0
+; SSE-NEXT:    addpd %xmm5, %xmm1
+; SSE-NEXT:    addpd %xmm6, %xmm2
+; SSE-NEXT:    addpd %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fadd_v8s64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vaddpd %ymm2, %ymm0, %ymm0
+; AVX-NEXT:    vaddpd %ymm3, %ymm1, %ymm1
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fadd_v8s64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vaddpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+  %ret = fadd <8 x double> %arg1, %arg2
+  ret <8 x double> %ret
+}
diff --git a/llvm/test/CodeGen/X86/isel-fadd.ll b/llvm/test/CodeGen/X86/isel-fadd.ll
new file mode 100644
index 00000000000000..16e005828608c1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-fadd.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=SDAG-X86
+; RUN: llc < %s -mtriple=i686-- -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=FASTISEL-X86
+; TODO: Enable when x87 is supported
+; llc < %s -mtriple=i686-- -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86
+; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=x86_64-- -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=x86_64-- -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=X64
+
+define float @test_fadd_float(float %arg1, float %arg2) {
+; SDAG-X86-LABEL: test_fadd_float:
+; SDAG-X86:       # %bb.0:
+; SDAG-X86-NEXT:    flds {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT:    fadds {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT:    retl
+;
+; FASTISEL-X86-LABEL: test_fadd_float:
+; FASTISEL-X86:       # %bb.0:
+; FASTISEL-X86-NEXT:    flds {{[0-9]+}}(%esp)
+; FASTISEL-X86-NEXT:    flds {{[0-9]+}}(%esp)
+; FASTISEL-X86-NEXT:    faddp %st, %st(1)
+; FASTISEL-X86-NEXT:    retl
+;
+; X64-LABEL: test_fadd_float:
+; X64:       # %bb.0:
+; X64-NEXT:    addss %xmm1, %xmm0
+; X64-NEXT:    retq
+  %ret = fadd float %arg1, %arg2
+  ret float %ret
+}
+
+define double @test_fadd_double(double %arg1, double %arg2) {
+; SDAG-X86-LABEL: test_fadd_double:
+; SDAG-X86:       # %bb.0:
+; SDAG-X86-NEXT:    fldl {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT:    faddl {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT:    retl
+;
+; FASTISEL-X86-LABEL: test_fadd_double:
+; FASTISEL-X86:       # %bb.0:
+; FASTISEL-X86-NEXT:    fldl {{[0-9]+}}(%esp)
+; FASTISEL-X86-NEXT:    fldl {{[0-9]+}}(%esp)
+; FASTISEL-X86-NEXT:    faddp %st, %st(1)
+; FASTISEL-X86-NEXT:    retl
+;
+; X64-LABEL: test_fadd_double:
+; X64:       # %bb.0:
+; X64-NEXT:    addsd %xmm1, %xmm0
+; X64-NEXT:    retq
+  %ret = fadd double %arg1, %arg2
+  ret double %ret
+}
diff --git a/llvm/test/CodeGen/X86/isel-fdiv-vector.ll b/llvm/test/CodeGen/X86/isel-fdiv-vector.ll
new file mode 100644
index 00000000000000..ff114c2af7e96b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-fdiv-vector.ll
@@ -0,0 +1,134 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=AVX512
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=AVX512
+
+define <4 x float> @test_fdiv_v4s32(<4 x float> %arg1, <4 x float> %arg2) {
+; SSE-LABEL: test_fdiv_v4s32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    divps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fdiv_v4s32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vdivps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fdiv_v4s32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vdivps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+  %ret = fdiv <4 x float> %arg1, %arg2
+  ret <4 x float> %ret
+}
+
+define <2 x double> @test_fdiv_v2s64(<2 x double> %arg1, <2 x double> %arg2) {
+; SSE-LABEL: test_fdiv_v2s64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    divpd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fdiv_v2s64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vdivpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fdiv_v2s64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vdivpd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+  %ret = fdiv <2 x double> %arg1, %arg2
+  ret <2 x double> %ret
+}
+
+define <8 x float> @test_fdiv_v8s32(<8 x float> %arg1, <8 x float> %arg2) {
+; SSE-LABEL: test_fdiv_v8s32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    divps %xmm2, %xmm0
+; SSE-NEXT:    divps %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fdiv_v8s32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vdivps %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fdiv_v8s32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vdivps %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %ret = fdiv <8 x float> %arg1, %arg2
+  ret <8 x float> %ret
+}
+
+define <4 x double> @test_fdiv_v4s64(<4 x double> %arg1, <4 x double> %arg2) {
+; SSE-LABEL: test_fdiv_v4s64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    divpd %xmm2, %xmm0
+; SSE-NEXT:    divpd %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fdiv_v4s64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vdivpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fdiv_v4s64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vdivpd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %ret = fdiv <4 x double> %arg1, %arg2
+  ret <4 x double> %ret
+}
+
+define <16 x float> @test_fdiv_v16s32(<16 x float> %arg1, <16 x float> %arg2) {
+; SSE-LABEL: test_fdiv_v16s32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    divps %xmm4, %xmm0
+; SSE-NEXT:    divps %xmm5, %xmm1
+; SSE-NEXT:    divps %xmm6, %xmm2
+; SSE-NEXT:    divps %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fdiv_v16s32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vdivps %ymm2, %ymm0, %ymm0
+; AVX-NEXT:    vdivps %ymm3, %ymm1, %ymm1
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fdiv_v16s32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vdivps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+  %ret = fdiv <16 x float> %arg1, %arg2
+  ret <16 x float> %ret
+}
+
+define <8 x double> @test_fdiv_v8s64(<8 x double> %arg1, <8 x double> %arg2) {
+; SSE-LABEL: test_fdiv_v8s64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    divpd %xmm4, %xmm0
+; SSE-NEXT:    divpd %xmm5, %xmm1
+; SSE-NEXT:    divpd %xmm6, %xmm2
+; SSE-NEXT:    divpd %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fdiv_v8s64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vdivpd %ymm2, %ymm0, %ymm0
+; AVX-NEXT:    vdivpd %ymm3, %ymm1, %ymm1
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fdiv_v8s64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vdivpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+  %ret = fdiv <8 x double> %arg1, %arg2
+  ret <8 x double> %ret
+}
diff --git a/llvm/test/CodeGen/X86/isel-fdiv.ll b/llvm/test/CodeGen/X86/isel-fdiv.ll
new file mode 100644
index 00000000000000..74ad27a8414d2c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-fdiv.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=SDAG-X86
+; RUN: llc < %s -mtriple=i686-- -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=FASTISEL-X86
+; TODO: Enable when x87 is supported
+; llc < %s -mtriple=i686-- -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86
+; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=x86_64-- -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=x86_64-- -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=X64
+
+define float @test_fdiv_float(float %arg1, float %arg2) {
+; SDAG-X86-LABEL: test_fdiv_float:
+; SDAG-X86:       # %bb.0:
+; SDAG-X86-NEXT:    flds {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT:    fdivs {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT:    retl
+;
+; FASTISEL-X86-LABEL: test_fdiv_float:
+; FASTISEL-X86:       # %bb.0:
+; FASTISEL-X86-NEXT:    flds {{[0-9]+}}(%esp)
+; FASTISEL-X86-NEXT:    flds {{[0-9]+}}(%esp)
+; FASTISEL-X86-NEXT:    fdivp %st, %st(1)
+; FASTISEL-X86-NEXT:    retl
+;
+; X64-LABEL: test_fdiv_float:
+; X64:       # %bb.0:
+; X64-NEXT:    divss %xmm1, %xmm0
+; X64-NEXT:    retq
+  %ret = fdiv float %arg1, %arg2
+  ret float %ret
+}
+
+define double @test_fdiv_double(double %arg1, double %arg2) {
+; SDAG-X86-LABEL: test_fdiv_double:
+; SDAG-X86:       # %bb.0:
+; SDAG-X86-NEXT:    fldl {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT:    fdivl {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT:    retl
+;
+; FASTISEL-X86-LABEL: test_fdiv_double:
+; FASTISEL-X86:       # %bb.0:
+; FASTISEL-X86-NEXT:    fldl {{[0-9]+}}(%esp)
+; FASTISEL-X86-NEXT:    fldl {{[0-9]+}}(%esp)
+; FASTISEL-X86-NEXT:    fdivp %st, %st(1)
+; FASTISEL-X86-NEXT:    retl
+;
+; X64-LABEL: test_fdiv_double:
+; X64:       # %bb.0:
+; X64-NEXT:    divsd %xmm1, %xmm0
+; X64-NEXT:    retq
+  %ret = fdiv double %arg1, %arg2
+  ret double %ret
+}
diff --git a/llvm/test/CodeGen/X86/isel-fmul-vector.ll b/llvm/test/CodeGen/X86/isel-fmul-vector.ll
new file mode 100644
index 00000000000000..7734f00f351cfb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-fmul-vector.ll
@@ -0,0 +1,134 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=AVX512
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=AVX512
+
+define <4 x float> @test_fmul_v4s32(<4 x float> %arg1, <4 x float> %arg2) {
+; SSE-LABEL: test_fmul_v4s32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    mulps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fmul_v4s32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fmul_v4s32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmulps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+  %ret = fmul <4 x float> %arg1, %arg2
+  ret <4 x float> %ret
+}
+
+define <2 x double> @test_fmul_v2s64(<2 x double> %arg1, <2 x double> %arg2) {
+; SSE-LABEL: test_fmul_v2s64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    mulpd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fmul_v2s64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fmul_v2s64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+  %ret = fmul <2 x double> %arg1, %arg2
+  ret <2 x double> %ret
+}
+
+define <8 x float> @test_fmul_v8s32(<8 x float> %arg1, <8 x float> %arg2) {
+; SSE-LABEL: test_fmul_v8s32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    mulps %xmm2, %xmm0
+; SSE-NEXT:    mulps %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fmul_v8s32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fmul_v8s32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmulps %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %ret = fmul <8 x float> %arg1, %arg2
+  ret <8 x float> %ret
+}
+
+define <4 x double> @test_fmul_v4s64(<4 x double> %arg1, <4 x double> %arg2) {
+; SSE-LABEL: test_fmul_v4s64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    mulpd %xmm2, %xmm0
+; SSE-NEXT:    mulpd %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fmul_v4s64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fmul_v4s64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %ret = fmul <4 x double> %arg1, %arg2
+  ret <4 x double> %ret
+}
+
+define <16 x float> @test_fmul_v16s32(<16 x float> %arg1, <16 x float> %arg2) {
+; SSE-LABEL: test_fmul_v16s32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    mulps %xmm4, %xmm0
+; SSE-NEXT:    mulps %xmm5, %xmm1
+; SSE-NEXT:    mulps %xmm6, %xmm2
+; SSE-NEXT:    mulps %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fmul_v16s32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmulps %ymm2, %ymm0, %ymm0
+; AVX-NEXT:    vmulps %ymm3, %ymm1, %ymm1
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fmul_v16s32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmulps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+  %ret = fmul <16 x float> %arg1, %arg2
+  ret <16 x float> %ret
+}
+
+define <8 x double> @test_fmul_v8s64(<8 x double> %arg1, <8 x double> %arg2) {
+; SSE-LABEL: test_fmul_v8s64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    mulpd %xmm4, %xmm0
+; SSE-NEXT:    mulpd %xmm5, %xmm1
+; SSE-NEXT:    mulpd %xmm6, %xmm2
+; SSE-NEXT:    mulpd %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fmul_v8s64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmulpd %ymm2, %ymm0, %ymm0
+; AVX-NEXT:    vmulpd %ymm3, %ymm1, %ymm1
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fmul_v8s64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmulpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+  %ret = fmul <8 x double> %arg1, %arg2
+  ret <8 x double> %ret
+}
diff --git a/llvm/test/CodeGen/X86/isel-fmul.ll b/llvm/test/CodeGen/X86/isel-fmul.ll
new file mode 100644
index 00000000000000..af509a2967c2be
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-fmul.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=SDAG-X86
+; RUN: llc < %s -mtriple=i686-- -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=FASTISEL-X86
+; TODO: Enable when x87 is supported
+; llc < %s -mtriple=i686-- -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86
+; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=x86_64-- -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=x86_64-- -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=X64
+
+define float @test_fmul_float(float %arg1, float %arg2) {
+; SDAG-X86-LABEL: test_fmul_float:
+; SDAG-X86:       # %bb.0:
+; SDAG-X86-NEXT:    flds {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT:    fmuls {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT:    retl
+;
+; FASTISEL-X86-LABEL: test_fmul_float:
+; FASTISEL-X86:       # %bb.0:
+; FASTISEL-X86-NEXT:    flds {{[0-9]+}}(%esp)
+; FASTISEL-X86-NEXT:    flds {{[0-9]+}}(%esp)
+; FASTISEL-X86-NEXT:    fmulp %st, %st(1)
+; FASTISEL-X86-NEXT:    retl
+;
+; X64-LABEL: test_fmul_float:
+; X64:       # %bb.0:
+; X64-NEXT:    mulss %xmm1, %xmm0
+; X64-NEXT:    retq
+  %ret = fmul float %arg1, %arg2
+  ret float %ret
+}
+
+define double @test_fmul_double(double %arg1, double %arg2) {
+; SDAG-X86-LABEL: test_fmul_double:
+; SDAG-X86:       # %bb.0:
+; SDAG-X86-NEXT:    fldl {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT:    fmull {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT:    retl
+;
+; FASTISEL-X86-LABEL: test_fmul_double:
+; FASTISEL-X86:       # %bb.0:
+; FASTISEL-X86-NEXT:    fldl {{[0-9]+}}(%esp)
+; FASTISEL-X86-NEXT:    fldl {{[0-9]+}}(%esp)
+; FASTISEL-X86-NEXT:    fmulp %st, %st(1)
+; FASTISEL-X86-NEXT:    retl
+;
+; X64-LABEL: test_fmul_double:
+; X64:       # %bb.0:
+; X64-NEXT:    mulsd %xmm1, %xmm0
+; X64-NEXT:    retq
+  %ret = fmul double %arg1, %arg2
+  ret double %ret
+}
diff --git a/llvm/test/CodeGen/X86/isel-fsub-vector.ll b/llvm/test/CodeGen/X86/isel-fsub-vector.ll
new file mode 100644
index 00000000000000..9e7a14bcdae561
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-fsub-vector.ll
@@ -0,0 +1,134 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=AVX512
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=AVX512
+
+define <4 x float> @test_fsub_v4s32(<4 x float> %arg1, <4 x float> %arg2) {
+; SSE-LABEL: test_fsub_v4s32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    subps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fsub_v4s32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vsubps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fsub_v4s32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vsubps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+  %ret = fsub <4 x float> %arg1, %arg2
+  ret <4 x float> %ret
+}
+
+define <2 x double> @test_fsub_v2s64(<2 x double> %arg1, <2 x double> %arg2) {
+; SSE-LABEL: test_fsub_v2s64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    subpd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fsub_v2s64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fsub_v2s64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+  %ret = fsub <2 x double> %arg1, %arg2
+  ret <2 x double> %ret
+}
+
+define <8 x float> @test_fsub_v8s32(<8 x float> %arg1, <8 x float> %arg2) {
+; SSE-LABEL: test_fsub_v8s32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    subps %xmm2, %xmm0
+; SSE-NEXT:    subps %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fsub_v8s32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vsubps %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fsub_v8s32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vsubps %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %ret = fsub <8 x float> %arg1, %arg2
+  ret <8 x float> %ret
+}
+
+define <4 x double> @test_fsub_v4s64(<4 x double> %arg1, <4 x double> %arg2) {
+; SSE-LABEL: test_fsub_v4s64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    subpd %xmm2, %xmm0
+; SSE-NEXT:    subpd %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fsub_v4s64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vsubpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fsub_v4s64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vsubpd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %ret = fsub <4 x double> %arg1, %arg2
+  ret <4 x double> %ret
+}
+
+define <16 x float> @test_fsub_v16s32(<16 x float> %arg1, <16 x float> %arg2) {
+; SSE-LABEL: test_fsub_v16s32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    subps %xmm4, %xmm0
+; SSE-NEXT:    subps %xmm5, %xmm1
+; SSE-NEXT:    subps %xmm6, %xmm2
+; SSE-NEXT:    subps %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fsub_v16s32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vsubps %ymm2, %ymm0, %ymm0
+; AVX-NEXT:    vsubps %ymm3, %ymm1, %ymm1
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fsub_v16s32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vsubps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+  %ret = fsub <16 x float> %arg1, %arg2
+  ret <16 x float> %ret
+}
+
+define <8 x double> @test_fsub_v8s64(<8 x double> %arg1, <8 x double> %arg2) {
+; SSE-LABEL: test_fsub_v8s64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    subpd %xmm4, %xmm0
+; SSE-NEXT:    subpd %xmm5, %xmm1
+; SSE-NEXT:    subpd %xmm6, %xmm2
+; SSE-NEXT:    subpd %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_fsub_v8s64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vsubpd %ymm2, %ymm0, %ymm0
+; AVX-NEXT:    vsubpd %ymm3, %ymm1, %ymm1
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_fsub_v8s64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vsubpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+  %ret = fsub <8 x double> %arg1, %arg2
+  ret <8 x double> %ret
+}
diff --git a/llvm/test/CodeGen/X86/isel-fsub.ll b/llvm/test/CodeGen/X86/isel-fsub.ll
new file mode 100644
index 00000000000000..943e633b0d9eb4
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-fsub.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=SDAG-X86
+; RUN: llc < %s -mtriple=i686-- -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=FASTISEL-X86
+; TODO: Enable when x87 is supported
+; llc < %s -mtriple=i686-- -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86
+; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=x86_64-- -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=x86_64-- -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=X64
+
+define float @test_fsub_float(float %arg1, float %arg2) {
+; SDAG-X86-LABEL: test_fsub_float:
+; SDAG-X86:       # %bb.0:
+; SDAG-X86-NEXT:    flds {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT:    fsubs {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT:    retl
+;
+; FASTISEL-X86-LABEL: test_fsub_float:
+; FASTISEL-X86:       # %bb.0:
+; FASTISEL-X86-NEXT:    flds {{[0-9]+}}(%esp)
+; FASTISEL-X86-NEXT:    flds {{[0-9]+}}(%esp)
+; FASTISEL-X86-NEXT:    fsubp %st, %st(1)
+; FASTISEL-X86-NEXT:    retl
+;
+; X64-LABEL: test_fsub_float:
+; X64:       # %bb.0:
+; X64-NEXT:    subss %xmm1, %xmm0
+; X64-NEXT:    retq
+  %ret = fsub float %arg1, %arg2
+  ret float %ret
+}
+
+define double @test_fsub_double(double %arg1, double %arg2) {
+; SDAG-X86-LABEL: test_fsub_double:
+; SDAG-X86:       # %bb.0:
+; SDAG-X86-NEXT:    fldl {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT:    fsubl {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT:    retl
+;
+; FASTISEL-X86-LABEL: test_fsub_double:
+; FASTISEL-X86:       # %bb.0:
+; FASTISEL-X86-NEXT:    fldl {{[0-9]+}}(%esp)
+; FASTISEL-X86-NEXT:    fldl {{[0-9]+}}(%esp)
+; FASTISEL-X86-NEXT:    fsubp %st, %st(1)
+; FASTISEL-X86-NEXT:    retl
+;
+; X64-LABEL: test_fsub_double:
+; X64:       # %bb.0:
+; X64-NEXT:    subsd %xmm1, %xmm0
+; X64-NEXT:    retq
+  %ret = fsub double %arg1, %arg2
+  ret double %ret
+}

>From c9c6660d9f83dcb4848052d236f85ea634be0a47 Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov <evgenii.kudriashov at intel.com>
Date: Tue, 2 Apr 2024 06:14:27 -0700
Subject: [PATCH 2/2] Use clampNumElements instead of clampMin/Max

---
 llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 3be3121bab66f8..631c6903743b39 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -430,10 +430,8 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
                (HasAVX512 && typeInSet(0, {v16s32, v8s64})(Query)) ||
                (UseX87 && typeInSet(0, {s80})(Query));
       })
-      .clampMinNumElements(0, s32, 4)
-      .clampMinNumElements(0, s64, 2)
-      .clampMaxNumElements(0, s32, HasAVX512 ? 16 : (HasAVX ? 8 : 4))
-      .clampMaxNumElements(0, s64, HasAVX512 ? 8 : (HasAVX ? 4 : 2))
+      .clampNumElements(0, v4s32, HasAVX512 ? v16s32 : (HasAVX ? v8s32 : v4s32))
+      .clampNumElements(0, v2s64, HasAVX512 ? v8s64 : (HasAVX ? v4s64 : v2s64))
       .scalarize(0);
 
   // fp comparison