[llvm] 58a2f83 - [AArch64][GISel] Expand coverage of FDiv and move into place.

David Green via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 30 14:09:59 PDT 2023


Author: David Green
Date: 2023-08-30T22:09:53+01:00
New Revision: 58a2f839fdd9fecf90ff09eb2437161e55a89e05

URL: https://github.com/llvm/llvm-project/commit/58a2f839fdd9fecf90ff09eb2437161e55a89e05
DIFF: https://github.com/llvm/llvm-project/commit/58a2f839fdd9fecf90ff09eb2437161e55a89e05.diff

LOG: [AArch64][GISel] Expand coverage of FDiv and move into place.

This adds some more extensive test coverage for fdiv through global isel,
switching the opcodes to use the more complete ActionDefinitions to handle more
cases and moving it into the position of the existing code which is no longer
needed.

Added: 
    llvm/test/CodeGen/AArch64/fdiv.ll

Modified: 
    llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index cd08bf2c1a726b..88e7115555f052 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4833,6 +4833,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case TargetOpcode::G_FADD:
   case TargetOpcode::G_FSUB:
   case TargetOpcode::G_FMUL:
+  case TargetOpcode::G_FDIV:
   case TargetOpcode::G_UADDSAT:
   case TargetOpcode::G_USUBSAT:
   case TargetOpcode::G_SADDSAT:

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 00424f23e96d7d..e8a2827e3a7952 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -235,11 +235,22 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
        .clampScalar(1, s32, s64)
       .widenScalarToNextPow2(0);
 
-  getActionDefinitionsBuilder(G_FDIV)
-      .legalFor({MinFPScalar, s32, s64, v2s64, v4s32, v2s32})
-      .clampScalar(0, MinFPScalar, s64)
+  getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG, G_FABS,
+                               G_FSQRT, G_FMAXNUM, G_FMINNUM, G_FMAXIMUM,
+                               G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT,
+                               G_FNEARBYINT, G_INTRINSIC_TRUNC,
+                               G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
+      .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
+      .legalIf([=](const LegalityQuery &Query) {
+        const auto &Ty = Query.Types[0];
+        return (Ty == v8s16 || Ty == v4s16) && HasFP16;
+      })
+      .libcallFor({s128})
+      .minScalarOrElt(0, MinFPScalar)
+      .clampNumElements(0, v4s16, v8s16)
       .clampNumElements(0, v2s32, v4s32)
-      .clampNumElements(0, v2s64, v2s64);
+      .clampNumElements(0, v2s64, v2s64)
+      .moreElementsToNextPow2(0);
 
   getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
 
@@ -948,22 +959,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   // TODO: Vector types.
   getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT}).lowerIf(isScalar(0));
 
-  getActionDefinitionsBuilder(
-      {G_FADD, G_FSUB, G_FMUL, G_FNEG, G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM,
-       G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
-       G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
-      .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
-      .legalIf([=](const LegalityQuery &Query) {
-        const auto &Ty = Query.Types[0];
-        return (Ty == v8s16 || Ty == v4s16) && HasFP16;
-      })
-      .libcallFor({s128})
-      .minScalarOrElt(0, MinFPScalar)
-      .clampNumElements(0, v4s16, v8s16)
-      .clampNumElements(0, v2s32, v4s32)
-      .clampNumElements(0, v2s64, v2s64)
-      .moreElementsToNextPow2(0);
-
   // TODO: Libcall support for s128.
   // TODO: s16 should be legal with full FP16 support.
   getActionDefinitionsBuilder({G_LROUND, G_LLROUND})

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index d4da0e317769c5..b3d912cc280da1 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -448,8 +448,9 @@
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: G_FDIV (opcode {{[0-9]+}}): 1 type index, 0 imm indices
-# DEBUG-NEXT: .. the first uncovered type index: 1, OK
-# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: G_FREM (opcode {{[0-9]+}}): 1 type index, 0 imm indices
 # DEBUG-NEXT: .. the first uncovered type index: 1, OK
 # DEBUG-NEXT: .. the first uncovered imm index: 0, OK

diff  --git a/llvm/test/CodeGen/AArch64/fdiv.ll b/llvm/test/CodeGen/AArch64/fdiv.ll
new file mode 100644
index 00000000000000..80089288a03651
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fdiv.ll
@@ -0,0 +1,540 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
+; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
+; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
+; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
+
+define double @fdiv_f64(double %a, double %b) {
+; CHECK-LABEL: fdiv_f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fdiv d0, d0, d1
+; CHECK-NEXT:    ret
+entry:
+  %c = fdiv double %a, %b
+  ret double %c
+}
+
+define float @fdiv_f32(float %a, float %b) {
+; CHECK-LABEL: fdiv_f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fdiv s0, s0, s1
+; CHECK-NEXT:    ret
+entry:
+  %c = fdiv float %a, %b
+  ret float %c
+}
+
+define half @fdiv_f16(half %a, half %b) {
+; CHECK-SD-NOFP16-LABEL: fdiv_f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    fdiv s0, s0, s1
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: fdiv_f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    fdiv h0, h0, h1
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: fdiv_f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fdiv_f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = fdiv half %a, %b
+  ret half %c
+}
+
+define <2 x double> @fdiv_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: fdiv_v2f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fdiv v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    ret
+entry:
+  %c = fdiv <2 x double> %a, %b
+  ret <2 x double> %c
+}
+
+define <3 x double> @fdiv_v3f64(<3 x double> %a, <3 x double> %b) {
+; CHECK-SD-LABEL: fdiv_v3f64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
+; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    fdiv v2.2d, v2.2d, v5.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT:    fdiv v0.2d, v0.2d, v3.2d
+; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fdiv_v3f64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    fdiv d2, d2, d5
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    fdiv v0.2d, v0.2d, v3.2d
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = fdiv <3 x double> %a, %b
+  ret <3 x double> %c
+}
+
+define <4 x double> @fdiv_v4f64(<4 x double> %a, <4 x double> %b) {
+; CHECK-SD-LABEL: fdiv_v4f64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fdiv v1.2d, v1.2d, v3.2d
+; CHECK-SD-NEXT:    fdiv v0.2d, v0.2d, v2.2d
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fdiv_v4f64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fdiv v0.2d, v0.2d, v2.2d
+; CHECK-GI-NEXT:    fdiv v1.2d, v1.2d, v3.2d
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = fdiv <4 x double> %a, %b
+  ret <4 x double> %c
+}
+
+define <2 x float> @fdiv_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: fdiv_v2f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fdiv v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    ret
+entry:
+  %c = fdiv <2 x float> %a, %b
+  ret <2 x float> %c
+}
+
+define <3 x float> @fdiv_v3f32(<3 x float> %a, <3 x float> %b) {
+; CHECK-LABEL: fdiv_v3f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fdiv v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = fdiv <3 x float> %a, %b
+  ret <3 x float> %c
+}
+
+define <4 x float> @fdiv_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: fdiv_v4f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fdiv v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+entry:
+  %c = fdiv <4 x float> %a, %b
+  ret <4 x float> %c
+}
+
+define <8 x float> @fdiv_v8f32(<8 x float> %a, <8 x float> %b) {
+; CHECK-SD-LABEL: fdiv_v8f32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fdiv v1.4s, v1.4s, v3.4s
+; CHECK-SD-NEXT:    fdiv v0.4s, v0.4s, v2.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fdiv_v8f32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fdiv v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT:    fdiv v1.4s, v1.4s, v3.4s
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = fdiv <8 x float> %a, %b
+  ret <8 x float> %c
+}
+
+define <7 x half> @fdiv_v7f16(<7 x half> %a, <7 x half> %b) {
+; CHECK-SD-NOFP16-LABEL: fdiv_v7f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h2, v1.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h0
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    mov h6, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    mov h7, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    mov h16, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    mov h17, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-SD-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-SD-NOFP16-NEXT:    fcvt s16, h16
+; CHECK-SD-NOFP16-NEXT:    fcvt s17, h17
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    fdiv s2, s3, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h1
+; CHECK-SD-NOFP16-NEXT:    fdiv s3, s4, s3
+; CHECK-SD-NOFP16-NEXT:    mov h4, v1.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt h18, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    fdiv s4, s5, s4
+; CHECK-SD-NOFP16-NEXT:    mov h5, v1.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[1], v18.h[0]
+; CHECK-SD-NOFP16-NEXT:    fdiv s5, s6, s5
+; CHECK-SD-NOFP16-NEXT:    mov h6, v1.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[2], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fdiv s6, s7, s6
+; CHECK-SD-NOFP16-NEXT:    mov h7, v1.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[3], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fdiv s7, s16, s7
+; CHECK-SD-NOFP16-NEXT:    mov h16, v1.h[6]
+; CHECK-SD-NOFP16-NEXT:    mov h1, v1.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s16, h16
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    fdiv s3, s17, s16
+; CHECK-SD-NOFP16-NEXT:    fdiv s0, s0, s1
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s6
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[4], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s7
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[5], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[6], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: fdiv_v7f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    fdiv v0.8h, v0.8h, v1.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: fdiv_v7f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    mov h4, v1.h[4]
+; CHECK-GI-NOFP16-NEXT:    mov h5, v1.h[5]
+; CHECK-GI-NOFP16-NEXT:    fcvtl v6.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v7.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    mov h1, v1.h[6]
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[1], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v4.h[1], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    fdiv v3.4s, v6.4s, v7.4s
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[2], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v4.h[2], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v2.h[3], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v4.h[3], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v2.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v4.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v3.4s
+; CHECK-GI-NOFP16-NEXT:    fdiv v1.4s, v1.4s, v2.4s
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[2], v3.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvtn v1.4h, v1.4s
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[3], v4.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v1.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[4], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h1, v1.h[2]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[5], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[6], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fdiv_v7f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    fdiv v0.8h, v0.8h, v1.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = fdiv <7 x half> %a, %b
+  ret <7 x half> %c
+}
+
+define <4 x half> @fdiv_v4f16(<4 x half> %a, <4 x half> %b) {
+; CHECK-SD-NOFP16-LABEL: fdiv_v4f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    fcvtl v1.4s, v1.4h
+; CHECK-SD-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-SD-NOFP16-NEXT:    fdiv v0.4s, v0.4s, v1.4s
+; CHECK-SD-NOFP16-NEXT:    fcvtn v0.4h, v0.4s
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: fdiv_v4f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    fdiv v0.4h, v0.4h, v1.4h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: fdiv_v4f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    fdiv v0.4s, v0.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v0.4s
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fdiv_v4f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    fdiv v0.4h, v0.4h, v1.4h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = fdiv <4 x half> %a, %b
+  ret <4 x half> %c
+}
+
+define <8 x half> @fdiv_v8f16(<8 x half> %a, <8 x half> %b) {
+; CHECK-SD-NOFP16-LABEL: fdiv_v8f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h2, v1.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h0
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    mov h6, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    mov h7, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    mov h16, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    mov h17, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-SD-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-SD-NOFP16-NEXT:    fcvt s16, h16
+; CHECK-SD-NOFP16-NEXT:    fcvt s17, h17
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    fdiv s2, s3, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h1
+; CHECK-SD-NOFP16-NEXT:    fdiv s3, s4, s3
+; CHECK-SD-NOFP16-NEXT:    mov h4, v1.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt h18, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    fdiv s4, s5, s4
+; CHECK-SD-NOFP16-NEXT:    mov h5, v1.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[1], v18.h[0]
+; CHECK-SD-NOFP16-NEXT:    fdiv s5, s6, s5
+; CHECK-SD-NOFP16-NEXT:    mov h6, v1.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[2], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fdiv s6, s7, s6
+; CHECK-SD-NOFP16-NEXT:    mov h7, v1.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[3], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fdiv s7, s16, s7
+; CHECK-SD-NOFP16-NEXT:    mov h16, v1.h[6]
+; CHECK-SD-NOFP16-NEXT:    mov h1, v1.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s16, h16
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    fdiv s3, s17, s16
+; CHECK-SD-NOFP16-NEXT:    fdiv s0, s0, s1
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s6
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[4], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s7
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[5], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s3
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[6], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[7], v0.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: fdiv_v8f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    fdiv v0.8h, v0.8h, v1.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: fdiv_v8f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v3.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT:    fdiv v2.4s, v2.4s, v3.4s
+; CHECK-GI-NOFP16-NEXT:    fdiv v1.4s, v0.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v2.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.8h, v1.4s
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fdiv_v8f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    fdiv v0.8h, v0.8h, v1.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = fdiv <8 x half> %a, %b
+  ret <8 x half> %c
+}
+
+define <16 x half> @fdiv_v16f16(<16 x half> %a, <16 x half> %b) {
+; CHECK-SD-NOFP16-LABEL: fdiv_v16f16:
+; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    mov h4, v2.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[1]
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h0
+; CHECK-SD-NOFP16-NEXT:    mov h7, v0.h[2]
+; CHECK-SD-NOFP16-NEXT:    mov h16, v0.h[3]
+; CHECK-SD-NOFP16-NEXT:    mov h17, v0.h[4]
+; CHECK-SD-NOFP16-NEXT:    mov h18, v0.h[5]
+; CHECK-SD-NOFP16-NEXT:    mov h19, v0.h[6]
+; CHECK-SD-NOFP16-NEXT:    mov h0, v0.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s20, h1
+; CHECK-SD-NOFP16-NEXT:    mov h21, v1.h[2]
+; CHECK-SD-NOFP16-NEXT:    mov h22, v1.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h5
+; CHECK-SD-NOFP16-NEXT:    mov h23, v1.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-SD-NOFP16-NEXT:    fcvt s16, h16
+; CHECK-SD-NOFP16-NEXT:    fcvt s17, h17
+; CHECK-SD-NOFP16-NEXT:    fcvt s18, h18
+; CHECK-SD-NOFP16-NEXT:    fcvt s19, h19
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    fcvt s21, h21
+; CHECK-SD-NOFP16-NEXT:    fcvt s22, h22
+; CHECK-SD-NOFP16-NEXT:    mov h24, v1.h[5]
+; CHECK-SD-NOFP16-NEXT:    fdiv s4, s5, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt s5, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt s23, h23
+; CHECK-SD-NOFP16-NEXT:    mov h25, v1.h[6]
+; CHECK-SD-NOFP16-NEXT:    fcvt s24, h24
+; CHECK-SD-NOFP16-NEXT:    fcvt s25, h25
+; CHECK-SD-NOFP16-NEXT:    fdiv s5, s6, s5
+; CHECK-SD-NOFP16-NEXT:    mov h6, v2.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s4
+; CHECK-SD-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-SD-NOFP16-NEXT:    fdiv s6, s7, s6
+; CHECK-SD-NOFP16-NEXT:    mov h7, v2.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-SD-NOFP16-NEXT:    fdiv s7, s16, s7
+; CHECK-SD-NOFP16-NEXT:    mov h16, v2.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt s16, h16
+; CHECK-SD-NOFP16-NEXT:    fdiv s16, s17, s16
+; CHECK-SD-NOFP16-NEXT:    mov h17, v2.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s17, h17
+; CHECK-SD-NOFP16-NEXT:    fdiv s17, s18, s17
+; CHECK-SD-NOFP16-NEXT:    mov h18, v2.h[6]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v2.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s18, h18
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fdiv s18, s19, s18
+; CHECK-SD-NOFP16-NEXT:    fdiv s19, s0, s2
+; CHECK-SD-NOFP16-NEXT:    mov h0, v3.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h2, v1.h[1]
+; CHECK-SD-NOFP16-NEXT:    mov h1, v1.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-SD-NOFP16-NEXT:    fdiv s2, s2, s0
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h3
+; CHECK-SD-NOFP16-NEXT:    fdiv s20, s20, s0
+; CHECK-SD-NOFP16-NEXT:    mov h0, v3.h[2]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    fdiv s21, s21, s0
+; CHECK-SD-NOFP16-NEXT:    mov h0, v3.h[3]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    fdiv s22, s22, s0
+; CHECK-SD-NOFP16-NEXT:    mov h0, v3.h[4]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    fdiv s23, s23, s0
+; CHECK-SD-NOFP16-NEXT:    mov h0, v3.h[5]
+; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    fdiv s24, s24, s0
+; CHECK-SD-NOFP16-NEXT:    mov h0, v3.h[6]
+; CHECK-SD-NOFP16-NEXT:    mov h3, v3.h[7]
+; CHECK-SD-NOFP16-NEXT:    fcvt s26, h0
+; CHECK-SD-NOFP16-NEXT:    fcvt h0, s5
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s2
+; CHECK-SD-NOFP16-NEXT:    fcvt h2, s20
+; CHECK-SD-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[1], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s6
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[1], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s21
+; CHECK-SD-NOFP16-NEXT:    fdiv s20, s25, s26
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[2], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s7
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[2], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h5, s22
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[3], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s23
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[3], v5.h[0]
+; CHECK-SD-NOFP16-NEXT:    fdiv s1, s1, s3
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s16
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[4], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s24
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[4], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s17
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[5], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h4, s20
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[5], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s18
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[6], v4.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[6], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    fcvt h3, s19
+; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
+; CHECK-SD-NOFP16-NEXT:    mov v0.h[7], v3.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v2.h[7], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    mov v1.16b, v2.16b
+; CHECK-SD-NOFP16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: fdiv_v16f16:
+; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    fdiv v1.8h, v1.8h, v3.8h
+; CHECK-SD-FP16-NEXT:    fdiv v0.8h, v0.8h, v2.8h
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NOFP16-LABEL: fdiv_v16f16:
+; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    fcvtl v4.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v5.4s, v2.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.4s, v2.8h
+; CHECK-GI-NOFP16-NEXT:    fdiv v4.4s, v4.4s, v5.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v5.4s, v3.4h
+; CHECK-GI-NOFP16-NEXT:    fdiv v2.4s, v0.4s, v2.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    fdiv v5.4s, v0.4s, v5.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v3.8h
+; CHECK-GI-NOFP16-NEXT:    fdiv v3.4s, v0.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v4.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtn v1.4h, v5.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.8h, v2.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtn2 v1.8h, v3.4s
+; CHECK-GI-NOFP16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fdiv_v16f16:
+; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    fdiv v0.8h, v0.8h, v2.8h
+; CHECK-GI-FP16-NEXT:    fdiv v1.8h, v1.8h, v3.8h
+; CHECK-GI-FP16-NEXT:    ret
+entry:
+  %c = fdiv <16 x half> %a, %b
+  ret <16 x half> %c
+}


        


More information about the llvm-commits mailing list