[llvm] 5e1c2bf - [AArch64][GlobalISel] Expand converage of FMA.

David Green via llvm-commits llvm-commits at lists.llvm.org
Sat Oct 14 05:24:35 PDT 2023


Author: David Green
Date: 2023-10-14T13:24:28+01:00
New Revision: 5e1c2bf3e6fca35ee0445b2a81d47e8576024186

URL: https://github.com/llvm/llvm-project/commit/5e1c2bf3e6fca35ee0445b2a81d47e8576024186
DIFF: https://github.com/llvm/llvm-project/commit/5e1c2bf3e6fca35ee0445b2a81d47e8576024186.diff

LOG: [AArch64][GlobalISel] Expand converage of FMA.

This moves the legalization of G_FMA to the action builder that can handle more
types. The existing arm64-vfloatintrinsics.ll has been removed as they are
covered in other test files.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-fma.mir
    llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
    llvm/test/CodeGen/AArch64/fmla.ll

Removed: 
    llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 378a8d0da4925d9..d2f855f4075308e 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -229,10 +229,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
        .clampScalar(1, s32, s64)
       .widenScalarToNextPow2(0);
 
-  getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG, G_FABS,
-                               G_FSQRT, G_FMAXNUM, G_FMINNUM, G_FMAXIMUM,
-                               G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT,
-                               G_FNEARBYINT, G_INTRINSIC_TRUNC,
+  getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
+                               G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM,
+                               G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR,
+                               G_FRINT, G_FNEARBYINT, G_INTRINSIC_TRUNC,
                                G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
       .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
       .legalIf([=](const LegalityQuery &Query) {
@@ -251,7 +251,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .minScalar(0, s32)
       .scalarize(0);
 
-  getActionDefinitionsBuilder({G_FMA, G_INTRINSIC_LRINT})
+  getActionDefinitionsBuilder(G_INTRINSIC_LRINT)
       // If we don't have full FP16 support, then scalarize the elements of
       // vectors containing fp16 types.
       .fewerElementsIf(

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fma.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fma.mir
index 3388ab97dc3352a..d344511010b21df 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fma.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fma.mir
@@ -13,43 +13,27 @@ body:             |
 
     ; NO-FP16-LABEL: name: test_v4f16.fma
     ; NO-FP16: liveins: $d0, $d1, $d2
-    ; NO-FP16: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
-    ; NO-FP16: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $d1
-    ; NO-FP16: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $d2
-    ; NO-FP16: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
-    ; NO-FP16: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
-    ; NO-FP16: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>)
-    ; NO-FP16: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
-    ; NO-FP16: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16)
-    ; NO-FP16: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV8]](s16)
-    ; NO-FP16: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]]
-    ; NO-FP16: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32)
-    ; NO-FP16: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
-    ; NO-FP16: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16)
-    ; NO-FP16: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV9]](s16)
-    ; NO-FP16: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]]
-    ; NO-FP16: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32)
-    ; NO-FP16: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
-    ; NO-FP16: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16)
-    ; NO-FP16: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[UV10]](s16)
-    ; NO-FP16: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]]
-    ; NO-FP16: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32)
-    ; NO-FP16: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
-    ; NO-FP16: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16)
-    ; NO-FP16: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[UV11]](s16)
-    ; NO-FP16: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FPEXT9]], [[FPEXT10]], [[FPEXT11]]
-    ; NO-FP16: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA3]](s32)
-    ; NO-FP16: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16)
-    ; NO-FP16: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>)
-    ; NO-FP16: RET_ReallyLR implicit $d0
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
+    ; NO-FP16-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $d1
+    ; NO-FP16-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $d2
+    ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>)
+    ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY1]](<4 x s16>)
+    ; NO-FP16-NEXT: [[FPEXT2:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY2]](<4 x s16>)
+    ; NO-FP16-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]]
+    ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FMA]](<4 x s32>)
+    ; NO-FP16-NEXT: $d0 = COPY [[FPTRUNC]](<4 x s16>)
+    ; NO-FP16-NEXT: RET_ReallyLR implicit $d0
+    ;
     ; FP16-LABEL: name: test_v4f16.fma
     ; FP16: liveins: $d0, $d1, $d2
-    ; FP16: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
-    ; FP16: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $d1
-    ; FP16: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $d2
-    ; FP16: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
-    ; FP16: $d0 = COPY [[FMA]](<4 x s16>)
-    ; FP16: RET_ReallyLR implicit $d0
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
+    ; FP16-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $d1
+    ; FP16-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $d2
+    ; FP16-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+    ; FP16-NEXT: $d0 = COPY [[FMA]](<4 x s16>)
+    ; FP16-NEXT: RET_ReallyLR implicit $d0
     %0:_(<4 x s16>) = COPY $d0
     %1:_(<4 x s16>) = COPY $d1
     %2:_(<4 x s16>) = COPY $d2
@@ -69,63 +53,36 @@ body:             |
 
     ; NO-FP16-LABEL: name: test_v8f16.fma
     ; NO-FP16: liveins: $q0, $q1, $q2
-    ; NO-FP16: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
-    ; NO-FP16: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1
-    ; NO-FP16: [[COPY2:%[0-9]+]]:_(<8 x s16>) = COPY $q2
-    ; NO-FP16: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
-    ; NO-FP16: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<8 x s16>)
-    ; NO-FP16: [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY2]](<8 x s16>)
-    ; NO-FP16: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
-    ; NO-FP16: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV8]](s16)
-    ; NO-FP16: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV16]](s16)
-    ; NO-FP16: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]]
-    ; NO-FP16: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32)
-    ; NO-FP16: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
-    ; NO-FP16: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV9]](s16)
-    ; NO-FP16: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV17]](s16)
-    ; NO-FP16: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]]
-    ; NO-FP16: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32)
-    ; NO-FP16: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
-    ; NO-FP16: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV10]](s16)
-    ; NO-FP16: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[UV18]](s16)
-    ; NO-FP16: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]]
-    ; NO-FP16: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32)
-    ; NO-FP16: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
-    ; NO-FP16: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[UV11]](s16)
-    ; NO-FP16: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[UV19]](s16)
-    ; NO-FP16: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FPEXT9]], [[FPEXT10]], [[FPEXT11]]
-    ; NO-FP16: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA3]](s32)
-    ; NO-FP16: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16)
-    ; NO-FP16: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[UV12]](s16)
-    ; NO-FP16: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[UV20]](s16)
-    ; NO-FP16: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FPEXT12]], [[FPEXT13]], [[FPEXT14]]
-    ; NO-FP16: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA4]](s32)
-    ; NO-FP16: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16)
-    ; NO-FP16: [[FPEXT16:%[0-9]+]]:_(s32) = G_FPEXT [[UV13]](s16)
-    ; NO-FP16: [[FPEXT17:%[0-9]+]]:_(s32) = G_FPEXT [[UV21]](s16)
-    ; NO-FP16: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FPEXT15]], [[FPEXT16]], [[FPEXT17]]
-    ; NO-FP16: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA5]](s32)
-    ; NO-FP16: [[FPEXT18:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16)
-    ; NO-FP16: [[FPEXT19:%[0-9]+]]:_(s32) = G_FPEXT [[UV14]](s16)
-    ; NO-FP16: [[FPEXT20:%[0-9]+]]:_(s32) = G_FPEXT [[UV22]](s16)
-    ; NO-FP16: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FPEXT18]], [[FPEXT19]], [[FPEXT20]]
-    ; NO-FP16: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA6]](s32)
-    ; NO-FP16: [[FPEXT21:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16)
-    ; NO-FP16: [[FPEXT22:%[0-9]+]]:_(s32) = G_FPEXT [[UV15]](s16)
-    ; NO-FP16: [[FPEXT23:%[0-9]+]]:_(s32) = G_FPEXT [[UV23]](s16)
-    ; NO-FP16: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FPEXT21]], [[FPEXT22]], [[FPEXT23]]
-    ; NO-FP16: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA7]](s32)
-    ; NO-FP16: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16), [[FPTRUNC4]](s16), [[FPTRUNC5]](s16), [[FPTRUNC6]](s16), [[FPTRUNC7]](s16)
-    ; NO-FP16: $q0 = COPY [[BUILD_VECTOR]](<8 x s16>)
-    ; NO-FP16: RET_ReallyLR implicit $q0
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+    ; NO-FP16-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1
+    ; NO-FP16-NEXT: [[COPY2:%[0-9]+]]:_(<8 x s16>) = COPY $q2
+    ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
+    ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>)
+    ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>)
+    ; NO-FP16-NEXT: [[UV2:%[0-9]+]]:_(<4 x s16>), [[UV3:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY1]](<8 x s16>)
+    ; NO-FP16-NEXT: [[FPEXT2:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV2]](<4 x s16>)
+    ; NO-FP16-NEXT: [[FPEXT3:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV3]](<4 x s16>)
+    ; NO-FP16-NEXT: [[UV4:%[0-9]+]]:_(<4 x s16>), [[UV5:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY2]](<8 x s16>)
+    ; NO-FP16-NEXT: [[FPEXT4:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV4]](<4 x s16>)
+    ; NO-FP16-NEXT: [[FPEXT5:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV5]](<4 x s16>)
+    ; NO-FP16-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[FPEXT]], [[FPEXT2]], [[FPEXT4]]
+    ; NO-FP16-NEXT: [[FMA1:%[0-9]+]]:_(<4 x s32>) = G_FMA [[FPEXT1]], [[FPEXT3]], [[FPEXT5]]
+    ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FMA]](<4 x s32>)
+    ; NO-FP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FMA1]](<4 x s32>)
+    ; NO-FP16-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[FPTRUNC]](<4 x s16>), [[FPTRUNC1]](<4 x s16>)
+    ; NO-FP16-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>)
+    ; NO-FP16-NEXT: RET_ReallyLR implicit $q0
+    ;
     ; FP16-LABEL: name: test_v8f16.fma
     ; FP16: liveins: $q0, $q1, $q2
-    ; FP16: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
-    ; FP16: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1
-    ; FP16: [[COPY2:%[0-9]+]]:_(<8 x s16>) = COPY $q2
-    ; FP16: [[FMA:%[0-9]+]]:_(<8 x s16>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
-    ; FP16: $q0 = COPY [[FMA]](<8 x s16>)
-    ; FP16: RET_ReallyLR implicit $q0
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+    ; FP16-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1
+    ; FP16-NEXT: [[COPY2:%[0-9]+]]:_(<8 x s16>) = COPY $q2
+    ; FP16-NEXT: [[FMA:%[0-9]+]]:_(<8 x s16>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+    ; FP16-NEXT: $q0 = COPY [[FMA]](<8 x s16>)
+    ; FP16-NEXT: RET_ReallyLR implicit $q0
     %0:_(<8 x s16>) = COPY $q0
     %1:_(<8 x s16>) = COPY $q1
     %2:_(<8 x s16>) = COPY $q2
@@ -145,20 +102,23 @@ body:             |
 
     ; NO-FP16-LABEL: name: test_v2f32.fma
     ; NO-FP16: liveins: $d0, $d1, $d2
-    ; NO-FP16: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
-    ; NO-FP16: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
-    ; NO-FP16: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $d2
-    ; NO-FP16: [[FMA:%[0-9]+]]:_(<2 x s32>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
-    ; NO-FP16: $d0 = COPY [[FMA]](<2 x s32>)
-    ; NO-FP16: RET_ReallyLR implicit $d0
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+    ; NO-FP16-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+    ; NO-FP16-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $d2
+    ; NO-FP16-NEXT: [[FMA:%[0-9]+]]:_(<2 x s32>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+    ; NO-FP16-NEXT: $d0 = COPY [[FMA]](<2 x s32>)
+    ; NO-FP16-NEXT: RET_ReallyLR implicit $d0
+    ;
     ; FP16-LABEL: name: test_v2f32.fma
     ; FP16: liveins: $d0, $d1, $d2
-    ; FP16: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
-    ; FP16: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
-    ; FP16: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $d2
-    ; FP16: [[FMA:%[0-9]+]]:_(<2 x s32>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
-    ; FP16: $d0 = COPY [[FMA]](<2 x s32>)
-    ; FP16: RET_ReallyLR implicit $d0
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+    ; FP16-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+    ; FP16-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $d2
+    ; FP16-NEXT: [[FMA:%[0-9]+]]:_(<2 x s32>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+    ; FP16-NEXT: $d0 = COPY [[FMA]](<2 x s32>)
+    ; FP16-NEXT: RET_ReallyLR implicit $d0
     %0:_(<2 x s32>) = COPY $d0
     %1:_(<2 x s32>) = COPY $d1
     %2:_(<2 x s32>) = COPY $d2
@@ -178,20 +138,23 @@ body:             |
 
     ; NO-FP16-LABEL: name: test_v4f32.fma
     ; NO-FP16: liveins: $q0, $q1, $q2
-    ; NO-FP16: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
-    ; NO-FP16: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
-    ; NO-FP16: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2
-    ; NO-FP16: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
-    ; NO-FP16: $q0 = COPY [[FMA]](<4 x s32>)
-    ; NO-FP16: RET_ReallyLR implicit $q0
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; NO-FP16-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+    ; NO-FP16-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2
+    ; NO-FP16-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+    ; NO-FP16-NEXT: $q0 = COPY [[FMA]](<4 x s32>)
+    ; NO-FP16-NEXT: RET_ReallyLR implicit $q0
+    ;
     ; FP16-LABEL: name: test_v4f32.fma
     ; FP16: liveins: $q0, $q1, $q2
-    ; FP16: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
-    ; FP16: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
-    ; FP16: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2
-    ; FP16: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
-    ; FP16: $q0 = COPY [[FMA]](<4 x s32>)
-    ; FP16: RET_ReallyLR implicit $q0
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; FP16-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+    ; FP16-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2
+    ; FP16-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+    ; FP16-NEXT: $q0 = COPY [[FMA]](<4 x s32>)
+    ; FP16-NEXT: RET_ReallyLR implicit $q0
     %0:_(<4 x s32>) = COPY $q0
     %1:_(<4 x s32>) = COPY $q1
     %2:_(<4 x s32>) = COPY $q2
@@ -211,20 +174,23 @@ body:             |
 
     ; NO-FP16-LABEL: name: test_v2f64.fma
     ; NO-FP16: liveins: $q0, $q1, $q2
-    ; NO-FP16: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
-    ; NO-FP16: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
-    ; NO-FP16: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
-    ; NO-FP16: [[FMA:%[0-9]+]]:_(<2 x s64>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
-    ; NO-FP16: $q0 = COPY [[FMA]](<2 x s64>)
-    ; NO-FP16: RET_ReallyLR implicit $q0
+    ; NO-FP16-NEXT: {{  $}}
+    ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+    ; NO-FP16-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+    ; NO-FP16-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
+    ; NO-FP16-NEXT: [[FMA:%[0-9]+]]:_(<2 x s64>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+    ; NO-FP16-NEXT: $q0 = COPY [[FMA]](<2 x s64>)
+    ; NO-FP16-NEXT: RET_ReallyLR implicit $q0
+    ;
     ; FP16-LABEL: name: test_v2f64.fma
     ; FP16: liveins: $q0, $q1, $q2
-    ; FP16: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
-    ; FP16: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
-    ; FP16: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
-    ; FP16: [[FMA:%[0-9]+]]:_(<2 x s64>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
-    ; FP16: $q0 = COPY [[FMA]](<2 x s64>)
-    ; FP16: RET_ReallyLR implicit $q0
+    ; FP16-NEXT: {{  $}}
+    ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+    ; FP16-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+    ; FP16-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
+    ; FP16-NEXT: [[FMA:%[0-9]+]]:_(<2 x s64>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+    ; FP16-NEXT: $q0 = COPY [[FMA]](<2 x s64>)
+    ; FP16-NEXT: RET_ReallyLR implicit $q0
     %0:_(<2 x s64>) = COPY $q0
     %1:_(<2 x s64>) = COPY $q1
     %2:_(<2 x s64>) = COPY $q2

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index bb915153c53a147..70114f83e8dd602 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -154,7 +154,6 @@
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: G_INTRINSIC_LRINT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
-# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: G_INTRINSIC_ROUNDEVEN (opcode {{[0-9]+}}): 1 type index, 0 imm indices
@@ -442,6 +441,7 @@
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: G_FMA (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: G_FMAD (opcode {{[0-9]+}}): 1 type index, 0 imm indices

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
deleted file mode 100644
index 0278128b25b62b6..000000000000000
--- a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
+++ /dev/null
@@ -1,514 +0,0 @@
-; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=-fullfp16 \
-; RUN:     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP16
-; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=+fullfp16 \
-; RUN:     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP16
-
-; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=-fullfp16 \
-; RUN:     -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* \
-; RUN:     2>&1 | FileCheck %s --check-prefixes=GISEL,GISEL-NOFP16,FALLBACK
-; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=+fullfp16 \
-; RUN:     -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* \
-; RUN:     2>&1 | FileCheck %s --check-prefixes=GISEL,GISEL-FP16,FALLBACK
-
-;;; Half vectors
-
-%v4f16 = type <4 x half>
-
-define %v4f16 @test_v4f16.powi(%v4f16 %a, i32 %b) {
-  ; This operation is expanded, whether with or without +fullfp16.
-  ; CHECK-LABEL:   test_v4f16.powi:
-  ; CHECK-COUNT-4: bl __powi
-  %1 = call %v4f16 @llvm.powi.v4f16.i32(%v4f16 %a, i32 %b)
-  ret %v4f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f16.sin
-define %v4f16 @test_v4f16.sin(%v4f16 %a) {
-  ; This operation is expanded, whether with or without +fullfp16.
-  ; CHECK-LABEL:   test_v4f16.sin:
-  ; CHECK-COUNT-4: bl sinf
-  ; GISEL-LABEL:   test_v4f16.sin:
-  ; GISEL-COUNT-4: bl sinf
-  %1 = call %v4f16 @llvm.sin.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f16.cos
-define %v4f16 @test_v4f16.cos(%v4f16 %a) {
-  ; This operation is expanded, whether with or without +fullfp16.
-  ; CHECK-LABEL:   test_v4f16.cos:
-  ; CHECK-COUNT-4: bl cosf
-  ; GISEL-LABEL:   test_v4f16.cos:
-  ; GISEL-COUNT-4: bl cosf
-  %1 = call %v4f16 @llvm.cos.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f16.exp
-define %v4f16 @test_v4f16.exp(%v4f16 %a) {
-  ; This operation is expanded, whether with or without +fullfp16.
-  ; CHECK-LABEL:   test_v4f16.exp:
-  ; CHECK-COUNT-4: bl exp
-  ; GISEL-LABEL:   test_v4f16.exp:
-  ; GISEL-COUNT-4: bl exp
-  %1 = call %v4f16 @llvm.exp.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
-define %v4f16 @test_v4f16.exp2(%v4f16 %a) {
-  ; This operation is expanded, whether with or without +fullfp16.
-  ; CHECK-LABEL:   test_v4f16.exp2:
-  ; CHECK-COUNT-4: bl exp2
-  %1 = call %v4f16 @llvm.exp2.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f16.log
-define %v4f16 @test_v4f16.log(%v4f16 %a) {
-  ; This operation is expanded, whether with or without +fullfp16.
-  ; CHECK-LABEL:   test_v4f16.log:
-  ; CHECK-COUNT-4: bl log
-  ; GISEL-LABEL:   test_v4f16.log:
-  ; GISEL-COUNT-4: bl log
-  %1 = call %v4f16 @llvm.log.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f16.log10
-define %v4f16 @test_v4f16.log10(%v4f16 %a) {
-  ; This operation is expanded, whether with or without +fullfp16.
-  ; CHECK-LABEL:   test_v4f16.log10:
-  ; CHECK-COUNT-4: bl log10
-  ; GISEL-LABEL:   test_v4f16.log10:
-  ; GISEL-COUNT-4: bl log10
-  %1 = call %v4f16 @llvm.log10.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f16.log2
-define %v4f16 @test_v4f16.log2(%v4f16 %a) {
-  ; This operation is expanded, whether with or without +fullfp16.
-  ; CHECK-LABEL:   test_v4f16.log2:
-  ; CHECK-COUNT-4: bl log2
-  ; GISEL-LABEL:   test_v4f16.log2:
-  ; GISEL-COUNT-4: bl log2
-  %1 = call %v4f16 @llvm.log2.v4f16(%v4f16 %a)
-  ret %v4f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f16.fma
-define %v4f16 @test_v4f16.fma(%v4f16 %a, %v4f16 %b, %v4f16 %c) {
-  ; CHECK-LABEL:          test_v4f16.fma:
-  ; CHECK-NOFP16-COUNT-4: fmadd s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
-  ; CHECK-FP16-NOT:       fcvt
-  ; CHECK-FP16:           fmla.4h
-  ; GISEL-LABEL:          test_v4f16.fma:
-  ; GISEL-NOFP16-COUNT-4: fmadd s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
-  ; GISEL-FP16-NOT:       fcvt
-  ; GISEL-FP16:           fmla.4h
-  %1 = call %v4f16 @llvm.fma.v4f16(%v4f16 %a, %v4f16 %b, %v4f16 %c)
-  ret %v4f16 %1
-}
-
-declare %v4f16 @llvm.powi.v4f16.i32(%v4f16, i32) #0
-declare %v4f16 @llvm.sin.v4f16(%v4f16) #0
-declare %v4f16 @llvm.cos.v4f16(%v4f16) #0
-declare %v4f16 @llvm.exp.v4f16(%v4f16) #0
-declare %v4f16 @llvm.exp2.v4f16(%v4f16) #0
-declare %v4f16 @llvm.log.v4f16(%v4f16) #0
-declare %v4f16 @llvm.log10.v4f16(%v4f16) #0
-declare %v4f16 @llvm.log2.v4f16(%v4f16) #0
-declare %v4f16 @llvm.fma.v4f16(%v4f16, %v4f16, %v4f16) #0
-
-;;;
-
-%v8f16 = type <8 x half>
-
-define %v8f16 @test_v8f16.powi(%v8f16 %a, i32 %b) {
-  ; This operation is expanded, whether with or without +fullfp16.
-  ; CHECK-LABEL:   test_v8f16.powi:
-  ; CHECK-COUNT-8: bl __powi
-  ; GISEL-LABEL:   test_v8f16.powi:
-  ; GISEL-COUNT-8: bl __powi
-  %1 = call %v8f16 @llvm.powi.v8f16.i32(%v8f16 %a, i32 %b)
-  ret %v8f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v8f16.sin
-define %v8f16 @test_v8f16.sin(%v8f16 %a) {
-  ; This operation is expanded, whether with or without +fullfp16.
-  ; CHECK-LABEL:   test_v8f16.sin:
-  ; CHECK-COUNT-8: bl sinf
-  ; GISEL-LABEL:   test_v8f16.sin:
-  ; GISEL-COUNT-8: bl sinf
-  %1 = call %v8f16 @llvm.sin.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v8f16.cos
-define %v8f16 @test_v8f16.cos(%v8f16 %a) {
-  ; This operation is expanded, whether with or without +fullfp16.
-  ; CHECK-LABEL:   test_v8f16.cos:
-  ; CHECK-COUNT-8: bl cosf
-  ; GISEL-LABEL:   test_v8f16.cos:
-  ; GISEL-COUNT-8: bl cosf
-  %1 = call %v8f16 @llvm.cos.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v8f16.exp
-define %v8f16 @test_v8f16.exp(%v8f16 %a) {
-  ; This operation is expanded, whether with or without +fullfp16.
-  ; CHECK-LABEL:   test_v8f16.exp:
-  ; CHECK-COUNT-8: bl exp
-  ; GISEL-LABEL:   test_v8f16.exp:
-  ; GISEL-COUNT-8: bl exp
-  %1 = call %v8f16 @llvm.exp.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
-define %v8f16 @test_v8f16.exp2(%v8f16 %a) {
-  ; This operation is expanded, whether with or without +fullfp16.
-  ; CHECK-LABEL:   test_v8f16.exp2:
-  ; CHECK-COUNT-8: bl exp2
-  %1 = call %v8f16 @llvm.exp2.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v8f16.log
-define %v8f16 @test_v8f16.log(%v8f16 %a) {
-  ; This operation is expanded, whether with or without +fullfp16.
-  ; CHECK-LABEL:   test_v8f16.log:
-  ; CHECK-COUNT-8: bl log
-  ; GISEL-LABEL:   test_v8f16.log:
-  ; GISEL-COUNT-8: bl log
-  %1 = call %v8f16 @llvm.log.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v8f16.log10
-define %v8f16 @test_v8f16.log10(%v8f16 %a) {
-  ; This operation is expanded, whether with or without +fullfp16.
-  ; CHECK-LABEL:   test_v8f16.log10:
-  ; CHECK-COUNT-8: bl log10
-  ; GISEL-LABEL:   test_v8f16.log10:
-  ; GISEL-COUNT-8: bl log10
-  %1 = call %v8f16 @llvm.log10.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v8f16.log2
-define %v8f16 @test_v8f16.log2(%v8f16 %a) {
-  ; This operation is expanded, whether with or without +fullfp16.
-  ; CHECK-LABEL:   test_v8f16.log2:
-  ; CHECK-COUNT-8: bl log2
-  ; GISEL-LABEL:   test_v8f16.log2:
-  ; GISEL-COUNT-8: bl log2
-  %1 = call %v8f16 @llvm.log2.v8f16(%v8f16 %a)
-  ret %v8f16 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v8f16.fma
-define %v8f16 @test_v8f16.fma(%v8f16 %a, %v8f16 %b, %v8f16 %c) {
-  ; CHECK-LABEL:          test_v8f16.fma:
-  ; CHECK-NOFP16-COUNT-8: fmadd s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
-  ; CHECK-FP16-NOT:       fcvt
-  ; CHECK-FP16:           fmla.8h
-  ; GISEL-LABEL:          test_v8f16.fma:
-  ; GISEL-NOFP16-COUNT-8: fmadd s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
-  ; GISEL-FP16-NOT:       fcvt
-  ; GISEL-FP16:           fmla.8h
-  %1 = call %v8f16 @llvm.fma.v8f16(%v8f16 %a, %v8f16 %b, %v8f16 %c)
-  ret %v8f16 %1
-}
-
-declare %v8f16 @llvm.powi.v8f16.i32(%v8f16, i32) #0
-declare %v8f16 @llvm.sin.v8f16(%v8f16) #0
-declare %v8f16 @llvm.cos.v8f16(%v8f16) #0
-declare %v8f16 @llvm.exp.v8f16(%v8f16) #0
-declare %v8f16 @llvm.exp2.v8f16(%v8f16) #0
-declare %v8f16 @llvm.log.v8f16(%v8f16) #0
-declare %v8f16 @llvm.log10.v8f16(%v8f16) #0
-declare %v8f16 @llvm.log2.v8f16(%v8f16) #0
-declare %v8f16 @llvm.fma.v8f16(%v8f16, %v8f16, %v8f16) #0
-
-;;; Float vectors
-
-%v2f32 = type <2 x float>
-
-; CHECK: test_v2f32.powi:
-define %v2f32 @test_v2f32.powi(%v2f32 %a, i32 %b) {
-  ; CHECK: pow
-  %1 = call %v2f32 @llvm.powi.v2f32.i32(%v2f32 %a, i32 %b)
-  ret %v2f32 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v2f32.sin
-; CHECK: test_v2f32.sin:
-define %v2f32 @test_v2f32.sin(%v2f32 %a) {
-  ; CHECK: sin
-  ; GISEL: sin
-  %1 = call %v2f32 @llvm.sin.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v2f32.cos
-; CHECK: test_v2f32.cos:
-define %v2f32 @test_v2f32.cos(%v2f32 %a) {
-  ; CHECK: cos
-  ; GISEL: cos
-  %1 = call %v2f32 @llvm.cos.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v2f32.exp
-; CHECK: test_v2f32.exp:
-; GISEL: test_v2f32.exp:
-define %v2f32 @test_v2f32.exp(%v2f32 %a) {
-  ; CHECK: exp
-  ; GISEL: exp
-  %1 = call %v2f32 @llvm.exp.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-; CHECK: test_v2f32.exp2:
-define %v2f32 @test_v2f32.exp2(%v2f32 %a) {
-  ; CHECK: exp
-  %1 = call %v2f32 @llvm.exp2.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v2f32.log
-; CHECK: test_v2f32.log:
-define %v2f32 @test_v2f32.log(%v2f32 %a) {
-  ; CHECK: log
-  ; GISEL: log
-  %1 = call %v2f32 @llvm.log.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v2f32.log10
-; CHECK: test_v2f32.log10:
-; GISEL: test_v2f32.log10:
-define %v2f32 @test_v2f32.log10(%v2f32 %a) {
-  ; CHECK: log
-  ; GISEL: log
-  %1 = call %v2f32 @llvm.log10.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v2f32.log2
-; CHECK: test_v2f32.log2:
-; GISEL: test_v2f32.log2:
-define %v2f32 @test_v2f32.log2(%v2f32 %a) {
-  ; CHECK: log
-  ; GISEL: log
-  %1 = call %v2f32 @llvm.log2.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v2f32.fma
-; CHECK-LABEL: test_v2f32.fma:
-; GISEL-LABEL: test_v2f32.fma:
-define %v2f32 @test_v2f32.fma(%v2f32 %a, %v2f32 %b, %v2f32 %c) {
-  ; CHECK: fmla.2s
-  ; GISEL: fmla.2s
-  %1 = call %v2f32 @llvm.fma.v2f32(%v2f32 %a, %v2f32 %b, %v2f32 %c)
-  ret %v2f32 %1
-}
-
-declare %v2f32 @llvm.powi.v2f32.i32(%v2f32, i32) #0
-declare %v2f32 @llvm.sin.v2f32(%v2f32) #0
-declare %v2f32 @llvm.cos.v2f32(%v2f32) #0
-declare %v2f32 @llvm.exp.v2f32(%v2f32) #0
-declare %v2f32 @llvm.exp2.v2f32(%v2f32) #0
-declare %v2f32 @llvm.log.v2f32(%v2f32) #0
-declare %v2f32 @llvm.log10.v2f32(%v2f32) #0
-declare %v2f32 @llvm.log2.v2f32(%v2f32) #0
-declare %v2f32 @llvm.fma.v2f32(%v2f32, %v2f32, %v2f32) #0
-
-;;;
-
-%v4f32 = type <4 x float>
-
-; CHECK: test_v4f32.powi:
-define %v4f32 @test_v4f32.powi(%v4f32 %a, i32 %b) {
-  ; CHECK: pow
-  %1 = call %v4f32 @llvm.powi.v4f32.i32(%v4f32 %a, i32 %b)
-  ret %v4f32 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f32.sin
-; CHECK: test_v4f32.sin:
-define %v4f32 @test_v4f32.sin(%v4f32 %a) {
-  ; CHECK: sin
-  ; GISEL: sin
-  %1 = call %v4f32 @llvm.sin.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f32.cos
-; CHECK: test_v4f32.cos:
-define %v4f32 @test_v4f32.cos(%v4f32 %a) {
-  ; CHECK: cos
-  ; GISEL: cos
-  %1 = call %v4f32 @llvm.cos.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f32.exp
-; CHECK: test_v4f32.exp:
-; GISEL: test_v4f32.exp:
-define %v4f32 @test_v4f32.exp(%v4f32 %a) {
-  ; CHECK: exp
-  ; GISEL: exp
-  %1 = call %v4f32 @llvm.exp.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-; CHECK: test_v4f32.exp2:
-define %v4f32 @test_v4f32.exp2(%v4f32 %a) {
-  ; CHECK: exp
-  %1 = call %v4f32 @llvm.exp2.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f32.log
-; CHECK: test_v4f32.log:
-define %v4f32 @test_v4f32.log(%v4f32 %a) {
-  ; CHECK: log
-  ; GISEL: log
-  %1 = call %v4f32 @llvm.log.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f32.log10
-; CHECK: test_v4f32.log10:
-define %v4f32 @test_v4f32.log10(%v4f32 %a) {
-  ; CHECK: log
-  ; GISEL: log
-  %1 = call %v4f32 @llvm.log10.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f32.log2
-; CHECK: test_v4f32.log2:
-; GISEL: test_v4f32.log2:
-define %v4f32 @test_v4f32.log2(%v4f32 %a) {
-  ; CHECK: log
-  ; GISEL: log
-  %1 = call %v4f32 @llvm.log2.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v4f32.fma
-; CHECK: test_v4f32.fma:
-; GISEL: test_v4f32.fma:
-define %v4f32 @test_v4f32.fma(%v4f32 %a, %v4f32 %b, %v4f32 %c) {
-  ; CHECK: fma
-  ; GISEL: fma
-  %1 = call %v4f32 @llvm.fma.v4f32(%v4f32 %a, %v4f32 %b, %v4f32 %c)
-  ret %v4f32 %1
-}
-
-declare %v4f32 @llvm.powi.v4f32.i32(%v4f32, i32) #0
-declare %v4f32 @llvm.sin.v4f32(%v4f32) #0
-declare %v4f32 @llvm.cos.v4f32(%v4f32) #0
-declare %v4f32 @llvm.exp.v4f32(%v4f32) #0
-declare %v4f32 @llvm.exp2.v4f32(%v4f32) #0
-declare %v4f32 @llvm.log.v4f32(%v4f32) #0
-declare %v4f32 @llvm.log10.v4f32(%v4f32) #0
-declare %v4f32 @llvm.log2.v4f32(%v4f32) #0
-declare %v4f32 @llvm.fma.v4f32(%v4f32, %v4f32, %v4f32) #0
-
-;;; Double vector
-
-%v2f64 = type <2 x double>
-
-; CHECK: test_v2f64.powi:
-define %v2f64 @test_v2f64.powi(%v2f64 %a, i32 %b) {
-  ; CHECK: pow
-  %1 = call %v2f64 @llvm.powi.v2f64.i32(%v2f64 %a, i32 %b)
-  ret %v2f64 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v2f64.sin
-; CHECK: test_v2f64.sin:
-define %v2f64 @test_v2f64.sin(%v2f64 %a) {
-  ; CHECK: sin
-  ; GISEL: sin
-  %1 = call %v2f64 @llvm.sin.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v2f64.cos
-; CHECK: test_v2f64.cos:
-define %v2f64 @test_v2f64.cos(%v2f64 %a) {
-  ; CHECK: cos
-  ; GISEL: cos
-  %1 = call %v2f64 @llvm.cos.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v2f64.exp
-; CHECK: test_v2f64.exp:
-; GISEL: test_v2f64.exp:
-define %v2f64 @test_v2f64.exp(%v2f64 %a) {
-  ; CHECK: exp
-  ; GISEL: exp
-  %1 = call %v2f64 @llvm.exp.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-; CHECK: test_v2f64.exp2:
-define %v2f64 @test_v2f64.exp2(%v2f64 %a) {
-  ; CHECK: exp
-  %1 = call %v2f64 @llvm.exp2.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v2f64.log
-; CHECK: test_v2f64.log:
-define %v2f64 @test_v2f64.log(%v2f64 %a) {
-  ; CHECK: log
-  ; GISEL: log
-  %1 = call %v2f64 @llvm.log.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v2f64.log10
-; CHECK: test_v2f64.log10:
-; GISEL: test_v2f64.log10:
-define %v2f64 @test_v2f64.log10(%v2f64 %a) {
-  ; CHECK: log
-  ; GISEL: log
-  %1 = call %v2f64 @llvm.log10.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v2f64.log2
-; CHECK: test_v2f64.log2:
-; GISEL: test_v2f64.log2:
-define %v2f64 @test_v2f64.log2(%v2f64 %a) {
-  ; CHECK: log
-  ; GISEL: log
-  %1 = call %v2f64 @llvm.log2.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-
-; FALLBACK-NOT: remark{{.*}}test_v2f64.fma
-; CHECK: test_v2f64.fma:
-; GISEL: test_v2f64.fma:
-define %v2f64 @test_v2f64.fma(%v2f64 %a, %v2f64 %b, %v2f64 %c) {
-  ; CHECK: fma
-  ; GISEL: fma
-  %1 = call %v2f64 @llvm.fma.v2f64(%v2f64 %a, %v2f64 %b, %v2f64 %c)
-  ret %v2f64 %1
-}
-
-declare %v2f64 @llvm.powi.v2f64.i32(%v2f64, i32) #0
-declare %v2f64 @llvm.sin.v2f64(%v2f64) #0
-declare %v2f64 @llvm.cos.v2f64(%v2f64) #0
-declare %v2f64 @llvm.exp.v2f64(%v2f64) #0
-declare %v2f64 @llvm.exp2.v2f64(%v2f64) #0
-declare %v2f64 @llvm.log.v2f64(%v2f64) #0
-declare %v2f64 @llvm.log10.v2f64(%v2f64) #0
-declare %v2f64 @llvm.log2.v2f64(%v2f64) #0
-declare %v2f64 @llvm.fma.v2f64(%v2f64, %v2f64, %v2f64) #0
-
-attributes #0 = { nounwind readonly }

diff  --git a/llvm/test/CodeGen/AArch64/fmla.ll b/llvm/test/CodeGen/AArch64/fmla.ll
index a1782f8e9087cdf..3ae2158a1886899 100644
--- a/llvm/test/CodeGen/AArch64/fmla.ll
+++ b/llvm/test/CodeGen/AArch64/fmla.ll
@@ -1,21 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
 ; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
-; RUN: llc -mtriple=aarch64-none-eabi -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
-; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
-
-; CHECK-GI:       warning: Instruction selection used fallback path for fma_v3f64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fma_v4f64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fma_v3f32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fma_v8f32
-; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for fma_v7f16
-; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for fma_v16f16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fmuladd_v3f64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fmuladd_v4f64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fmuladd_v3f32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fmuladd_v8f32
-; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for fmuladd_v7f16
-; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for fmuladd_v16f16
+; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
+; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
 
 define double @fma_f64(double %a, double %b, double %c) {
 ; CHECK-LABEL: fma_f64:
@@ -82,27 +69,45 @@ entry:
 }
 
 define <3 x double> @fma_v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) {
-; CHECK-LABEL: fma_v3f64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d6 killed $d6 def $q6
-; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d7 killed $d7 def $q7
-; CHECK-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d5 killed $d5 def $q5
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:    mov v3.d[1], v4.d[0]
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-NEXT:    mov v6.d[1], v7.d[0]
-; CHECK-NEXT:    fmla v6.2d, v3.2d, v0.2d
-; CHECK-NEXT:    ldr d3, [sp]
-; CHECK-NEXT:    fmla v3.2d, v5.2d, v2.2d
-; CHECK-NEXT:    fmov d0, d6
-; CHECK-NEXT:    ext v1.16b, v6.16b, v6.16b, #8
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
-; CHECK-NEXT:    fmov d2, d3
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fma_v3f64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d6 killed $d6 def $q6
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d7 killed $d7 def $q7
+; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    mov v6.d[1], v7.d[0]
+; CHECK-SD-NEXT:    fmla v6.2d, v3.2d, v0.2d
+; CHECK-SD-NEXT:    ldr d3, [sp]
+; CHECK-SD-NEXT:    fmla v3.2d, v5.2d, v2.2d
+; CHECK-SD-NEXT:    fmov d0, d6
+; CHECK-SD-NEXT:    ext v1.16b, v6.16b, v6.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    fmov d2, d3
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fma_v3f64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d6 killed $d6 def $q6
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    // kill: def $d7 killed $d7 def $q7
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    mov v6.d[1], v7.d[0]
+; CHECK-GI-NEXT:    fmla v6.2d, v3.2d, v0.2d
+; CHECK-GI-NEXT:    ldr d0, [sp]
+; CHECK-GI-NEXT:    fmadd d2, d2, d5, d0
+; CHECK-GI-NEXT:    mov d1, v6.d[1]
+; CHECK-GI-NEXT:    fmov d0, d6
+; CHECK-GI-NEXT:    ret
 entry:
   %d = call <3 x double> @llvm.fma.v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c)
   ret <3 x double> %d
@@ -249,67 +254,46 @@ define <7 x half> @fma_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fma_v7f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h4, v1.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h5, v2.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h7, v1.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h16, v2.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvt s17, h0
-; CHECK-GI-NOFP16-NEXT:    fcvt s18, h1
-; CHECK-GI-NOFP16-NEXT:    fcvt s19, h2
-; CHECK-GI-NOFP16-NEXT:    mov h20, v0.h[4]
-; CHECK-GI-NOFP16-NEXT:    mov h21, v1.h[4]
-; CHECK-GI-NOFP16-NEXT:    mov h22, v2.h[4]
-; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT:    fcvt s7, h7
-; CHECK-GI-NOFP16-NEXT:    fcvt s16, h16
-; CHECK-GI-NOFP16-NEXT:    fmadd s17, s17, s18, s19
-; CHECK-GI-NOFP16-NEXT:    mov h18, v1.h[3]
-; CHECK-GI-NOFP16-NEXT:    mov h19, v2.h[3]
-; CHECK-GI-NOFP16-NEXT:    fmadd s4, s3, s4, s5
-; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[3]
-; CHECK-GI-NOFP16-NEXT:    fmadd s6, s6, s7, s16
-; CHECK-GI-NOFP16-NEXT:    fcvt h3, s17
-; CHECK-GI-NOFP16-NEXT:    fcvt s7, h18
-; CHECK-GI-NOFP16-NEXT:    fcvt s16, h19
-; CHECK-GI-NOFP16-NEXT:    fcvt s17, h20
-; CHECK-GI-NOFP16-NEXT:    fcvt s18, h21
-; CHECK-GI-NOFP16-NEXT:    fcvt s19, h22
-; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT:    mov h20, v0.h[5]
-; CHECK-GI-NOFP16-NEXT:    mov h21, v1.h[5]
-; CHECK-GI-NOFP16-NEXT:    mov h22, v2.h[5]
-; CHECK-GI-NOFP16-NEXT:    fcvt h6, s6
+; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    mov h4, v1.h[4]
+; CHECK-GI-NOFP16-NEXT:    mov h7, v1.h[5]
+; CHECK-GI-NOFP16-NEXT:    mov h5, v2.h[4]
+; CHECK-GI-NOFP16-NEXT:    mov h16, v2.h[5]
+; CHECK-GI-NOFP16-NEXT:    fcvtl v17.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v18.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v19.4s, v2.4h
 ; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[6]
 ; CHECK-GI-NOFP16-NEXT:    mov h1, v1.h[6]
 ; CHECK-GI-NOFP16-NEXT:    mov h2, v2.h[6]
-; CHECK-GI-NOFP16-NEXT:    fmadd s5, s5, s7, s16
-; CHECK-GI-NOFP16-NEXT:    mov v3.h[1], v4.h[0]
-; CHECK-GI-NOFP16-NEXT:    fmadd s4, s17, s18, s19
-; CHECK-GI-NOFP16-NEXT:    fcvt s7, h20
-; CHECK-GI-NOFP16-NEXT:    fcvt s16, h21
-; CHECK-GI-NOFP16-NEXT:    fcvt s17, h22
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT:    fcvt h5, s5
-; CHECK-GI-NOFP16-NEXT:    mov v3.h[2], v6.h[0]
-; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT:    fmadd s6, s7, s16, s17
-; CHECK-GI-NOFP16-NEXT:    fmadd s0, s0, s1, s2
-; CHECK-GI-NOFP16-NEXT:    mov v3.h[3], v5.h[0]
-; CHECK-GI-NOFP16-NEXT:    fcvt h5, s6
-; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT:    mov v3.h[4], v4.h[0]
-; CHECK-GI-NOFP16-NEXT:    mov v3.h[5], v5.h[0]
-; CHECK-GI-NOFP16-NEXT:    mov v3.h[6], v0.h[0]
-; CHECK-GI-NOFP16-NEXT:    mov v3.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[1], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v4.h[1], v7.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v5.h[1], v16.h[0]
+; CHECK-GI-NOFP16-NEXT:    fmla v19.4s, v18.4s, v17.4s
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[2], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v4.h[2], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v5.h[2], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v19.4s
+; CHECK-GI-NOFP16-NEXT:    mov v3.h[3], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v4.h[3], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v5.h[3], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v3.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v3.4s, v4.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v4.4s, v5.4h
+; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    fmla v4.4s, v3.4s, v2.4s
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[2], v5.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcvtn v1.4h, v4.4s
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[3], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h2, v1.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[4], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h1, v1.h[2]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[5], v2.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[6], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v0.h[7], v0.h[0]
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fma_v7f16:
@@ -371,42 +355,11 @@ define <4 x half> @fma_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fma_v4f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h4, v1.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h5, v2.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h7, v1.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h16, v2.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvt s17, h0
-; CHECK-GI-NOFP16-NEXT:    fcvt s18, h1
-; CHECK-GI-NOFP16-NEXT:    fcvt s19, h2
-; CHECK-GI-NOFP16-NEXT:    mov h1, v1.h[3]
-; CHECK-GI-NOFP16-NEXT:    mov h2, v2.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT:    fcvt s7, h7
-; CHECK-GI-NOFP16-NEXT:    fcvt s16, h16
-; CHECK-GI-NOFP16-NEXT:    fmadd s17, s17, s18, s19
-; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT:    fmadd s3, s3, s4, s5
-; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[3]
-; CHECK-GI-NOFP16-NEXT:    fmadd s5, s6, s7, s16
-; CHECK-GI-NOFP16-NEXT:    fcvt h0, s17
-; CHECK-GI-NOFP16-NEXT:    fcvt h3, s3
-; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT:    fcvt h5, s5
-; CHECK-GI-NOFP16-NEXT:    fmadd s1, s4, s1, s2
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v3.h[0]
-; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[2], v5.h[0]
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[3], v1.h[0]
-; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v2.4h
+; CHECK-GI-NOFP16-NEXT:    fmla v2.4s, v1.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v2.4s
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fma_v4f16:
@@ -501,75 +454,16 @@ define <8 x half> @fma_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fma_v8f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h4, v1.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h5, v2.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h7, v1.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h16, v2.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvt s17, h0
-; CHECK-GI-NOFP16-NEXT:    fcvt s18, h1
-; CHECK-GI-NOFP16-NEXT:    fcvt s19, h2
-; CHECK-GI-NOFP16-NEXT:    mov h20, v0.h[3]
-; CHECK-GI-NOFP16-NEXT:    mov h21, v1.h[3]
-; CHECK-GI-NOFP16-NEXT:    mov h22, v2.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT:    fcvt s7, h7
-; CHECK-GI-NOFP16-NEXT:    fcvt s16, h16
-; CHECK-GI-NOFP16-NEXT:    fmadd s17, s17, s18, s19
-; CHECK-GI-NOFP16-NEXT:    mov h18, v1.h[4]
-; CHECK-GI-NOFP16-NEXT:    fcvt s19, h22
-; CHECK-GI-NOFP16-NEXT:    mov h22, v2.h[5]
-; CHECK-GI-NOFP16-NEXT:    fmadd s4, s3, s4, s5
-; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[4]
-; CHECK-GI-NOFP16-NEXT:    fmadd s6, s6, s7, s16
-; CHECK-GI-NOFP16-NEXT:    fcvt s7, h20
-; CHECK-GI-NOFP16-NEXT:    fcvt s16, h21
-; CHECK-GI-NOFP16-NEXT:    mov h20, v2.h[4]
-; CHECK-GI-NOFP16-NEXT:    fcvt h3, s17
-; CHECK-GI-NOFP16-NEXT:    mov h17, v0.h[5]
-; CHECK-GI-NOFP16-NEXT:    mov h21, v1.h[5]
-; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT:    fmadd s7, s7, s16, s19
-; CHECK-GI-NOFP16-NEXT:    fcvt h6, s6
-; CHECK-GI-NOFP16-NEXT:    fcvt s16, h18
-; CHECK-GI-NOFP16-NEXT:    fcvt s18, h20
-; CHECK-GI-NOFP16-NEXT:    fcvt s19, h22
-; CHECK-GI-NOFP16-NEXT:    mov h20, v0.h[6]
-; CHECK-GI-NOFP16-NEXT:    mov h22, v2.h[6]
-; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
-; CHECK-GI-NOFP16-NEXT:    mov h2, v2.h[7]
-; CHECK-GI-NOFP16-NEXT:    mov v3.h[1], v4.h[0]
-; CHECK-GI-NOFP16-NEXT:    fcvt s4, h17
-; CHECK-GI-NOFP16-NEXT:    fcvt s17, h21
-; CHECK-GI-NOFP16-NEXT:    mov h21, v1.h[6]
-; CHECK-GI-NOFP16-NEXT:    fcvt h7, s7
-; CHECK-GI-NOFP16-NEXT:    mov h1, v1.h[7]
-; CHECK-GI-NOFP16-NEXT:    fmadd s5, s5, s16, s18
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT:    fmadd s4, s4, s17, s19
-; CHECK-GI-NOFP16-NEXT:    mov v3.h[2], v6.h[0]
-; CHECK-GI-NOFP16-NEXT:    fcvt s6, h20
-; CHECK-GI-NOFP16-NEXT:    fcvt s16, h21
-; CHECK-GI-NOFP16-NEXT:    fcvt s17, h22
-; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT:    fcvt h5, s5
-; CHECK-GI-NOFP16-NEXT:    mov v3.h[3], v7.h[0]
-; CHECK-GI-NOFP16-NEXT:    fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT:    fmadd s6, s6, s16, s17
-; CHECK-GI-NOFP16-NEXT:    fmadd s0, s0, s1, s2
-; CHECK-GI-NOFP16-NEXT:    mov v3.h[4], v5.h[0]
-; CHECK-GI-NOFP16-NEXT:    fcvt h5, s6
-; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT:    mov v3.h[5], v4.h[0]
-; CHECK-GI-NOFP16-NEXT:    mov v3.h[6], v5.h[0]
-; CHECK-GI-NOFP16-NEXT:    mov v3.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT:    fcvtl v3.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v4.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v5.4s, v2.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.4s, v2.8h
+; CHECK-GI-NOFP16-NEXT:    fmla v5.4s, v4.4s, v3.4s
+; CHECK-GI-NOFP16-NEXT:    fmla v2.4s, v1.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v5.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.8h, v2.4s
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fma_v8f16:
@@ -735,148 +629,26 @@ define <16 x half> @fma_v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fma_v16f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-GI-NOFP16-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-GI-NOFP16-NEXT:    .cfi_offset b8, -16
-; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[1]
-; CHECK-GI-NOFP16-NEXT:    fcvt s16, h0
-; CHECK-GI-NOFP16-NEXT:    fcvt s17, h2
-; CHECK-GI-NOFP16-NEXT:    fcvt s18, h4
-; CHECK-GI-NOFP16-NEXT:    mov h19, v0.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h20, v2.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h21, v4.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h22, v0.h[3]
-; CHECK-GI-NOFP16-NEXT:    mov h23, v2.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h24, v4.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h26, v2.h[3]
-; CHECK-GI-NOFP16-NEXT:    mov h27, v4.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT:    mov h25, v0.h[4]
-; CHECK-GI-NOFP16-NEXT:    mov h28, v1.h[1]
-; CHECK-GI-NOFP16-NEXT:    fmadd s16, s16, s17, s18
-; CHECK-GI-NOFP16-NEXT:    fcvt s17, h20
-; CHECK-GI-NOFP16-NEXT:    fcvt s19, h19
-; CHECK-GI-NOFP16-NEXT:    fcvt s18, h21
-; CHECK-GI-NOFP16-NEXT:    fcvt s20, h23
-; CHECK-GI-NOFP16-NEXT:    fcvt s22, h22
-; CHECK-GI-NOFP16-NEXT:    fcvt s21, h24
-; CHECK-GI-NOFP16-NEXT:    fcvt s23, h26
-; CHECK-GI-NOFP16-NEXT:    fcvt s24, h27
-; CHECK-GI-NOFP16-NEXT:    fcvt s26, h1
-; CHECK-GI-NOFP16-NEXT:    fcvt s27, h3
-; CHECK-GI-NOFP16-NEXT:    fcvt s29, h5
-; CHECK-GI-NOFP16-NEXT:    mov h31, v2.h[4]
-; CHECK-GI-NOFP16-NEXT:    mov h8, v3.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h7, v1.h[2]
-; CHECK-GI-NOFP16-NEXT:    fmadd s17, s6, s17, s18
-; CHECK-GI-NOFP16-NEXT:    fcvt h6, s16
-; CHECK-GI-NOFP16-NEXT:    fcvt s16, h28
-; CHECK-GI-NOFP16-NEXT:    fmadd s19, s19, s20, s21
-; CHECK-GI-NOFP16-NEXT:    fmadd s18, s22, s23, s24
-; CHECK-GI-NOFP16-NEXT:    mov h20, v5.h[1]
-; CHECK-GI-NOFP16-NEXT:    fmadd s24, s26, s27, s29
-; CHECK-GI-NOFP16-NEXT:    mov h22, v4.h[4]
-; CHECK-GI-NOFP16-NEXT:    mov h21, v3.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h26, v5.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvt s25, h25
-; CHECK-GI-NOFP16-NEXT:    fcvt s28, h31
-; CHECK-GI-NOFP16-NEXT:    fcvt h29, s17
-; CHECK-GI-NOFP16-NEXT:    fcvt s17, h8
-; CHECK-GI-NOFP16-NEXT:    mov h30, v1.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvt s20, h20
-; CHECK-GI-NOFP16-NEXT:    mov h23, v3.h[3]
-; CHECK-GI-NOFP16-NEXT:    mov h27, v5.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvt s22, h22
-; CHECK-GI-NOFP16-NEXT:    fcvt s7, h7
-; CHECK-GI-NOFP16-NEXT:    fcvt s21, h21
-; CHECK-GI-NOFP16-NEXT:    fcvt s26, h26
-; CHECK-GI-NOFP16-NEXT:    mov h31, v0.h[5]
-; CHECK-GI-NOFP16-NEXT:    mov h8, v1.h[4]
-; CHECK-GI-NOFP16-NEXT:    fcvt s30, h30
-; CHECK-GI-NOFP16-NEXT:    fcvt h19, s19
-; CHECK-GI-NOFP16-NEXT:    mov v6.h[1], v29.h[0]
-; CHECK-GI-NOFP16-NEXT:    fmadd s20, s16, s17, s20
-; CHECK-GI-NOFP16-NEXT:    fcvt s23, h23
-; CHECK-GI-NOFP16-NEXT:    fcvt s27, h27
-; CHECK-GI-NOFP16-NEXT:    fmadd s16, s25, s28, s22
-; CHECK-GI-NOFP16-NEXT:    mov h22, v2.h[5]
-; CHECK-GI-NOFP16-NEXT:    mov h25, v4.h[5]
-; CHECK-GI-NOFP16-NEXT:    fmadd s21, s7, s21, s26
-; CHECK-GI-NOFP16-NEXT:    mov h26, v3.h[4]
-; CHECK-GI-NOFP16-NEXT:    mov h28, v5.h[4]
-; CHECK-GI-NOFP16-NEXT:    fcvt h7, s24
-; CHECK-GI-NOFP16-NEXT:    fcvt s24, h31
-; CHECK-GI-NOFP16-NEXT:    mov h29, v1.h[5]
-; CHECK-GI-NOFP16-NEXT:    fmadd s17, s30, s23, s27
-; CHECK-GI-NOFP16-NEXT:    fcvt h20, s20
-; CHECK-GI-NOFP16-NEXT:    fcvt s27, h8
-; CHECK-GI-NOFP16-NEXT:    fcvt s22, h22
-; CHECK-GI-NOFP16-NEXT:    fcvt s25, h25
-; CHECK-GI-NOFP16-NEXT:    fcvt h18, s18
-; CHECK-GI-NOFP16-NEXT:    fcvt s26, h26
-; CHECK-GI-NOFP16-NEXT:    fcvt s28, h28
-; CHECK-GI-NOFP16-NEXT:    mov v6.h[2], v19.h[0]
-; CHECK-GI-NOFP16-NEXT:    fcvt h21, s21
-; CHECK-GI-NOFP16-NEXT:    mov h23, v0.h[6]
-; CHECK-GI-NOFP16-NEXT:    mov h19, v1.h[6]
-; CHECK-GI-NOFP16-NEXT:    mov v7.h[1], v20.h[0]
-; CHECK-GI-NOFP16-NEXT:    mov h20, v3.h[5]
-; CHECK-GI-NOFP16-NEXT:    fcvt h17, s17
-; CHECK-GI-NOFP16-NEXT:    fmadd s22, s24, s22, s25
-; CHECK-GI-NOFP16-NEXT:    mov h24, v5.h[5]
-; CHECK-GI-NOFP16-NEXT:    mov h25, v2.h[6]
-; CHECK-GI-NOFP16-NEXT:    fmadd s26, s27, s26, s28
-; CHECK-GI-NOFP16-NEXT:    mov h27, v4.h[6]
-; CHECK-GI-NOFP16-NEXT:    mov h28, v3.h[6]
-; CHECK-GI-NOFP16-NEXT:    mov v6.h[3], v18.h[0]
-; CHECK-GI-NOFP16-NEXT:    mov h18, v5.h[6]
-; CHECK-GI-NOFP16-NEXT:    fcvt h16, s16
-; CHECK-GI-NOFP16-NEXT:    mov v7.h[2], v21.h[0]
-; CHECK-GI-NOFP16-NEXT:    fcvt s21, h29
-; CHECK-GI-NOFP16-NEXT:    fcvt s20, h20
-; CHECK-GI-NOFP16-NEXT:    fcvt s24, h24
-; CHECK-GI-NOFP16-NEXT:    fcvt s23, h23
-; CHECK-GI-NOFP16-NEXT:    fcvt s25, h25
-; CHECK-GI-NOFP16-NEXT:    fcvt s27, h27
-; CHECK-GI-NOFP16-NEXT:    fcvt s19, h19
-; CHECK-GI-NOFP16-NEXT:    fcvt s28, h28
-; CHECK-GI-NOFP16-NEXT:    fcvt s18, h18
-; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
-; CHECK-GI-NOFP16-NEXT:    mov h2, v2.h[7]
-; CHECK-GI-NOFP16-NEXT:    mov v7.h[3], v17.h[0]
-; CHECK-GI-NOFP16-NEXT:    fcvt h17, s26
-; CHECK-GI-NOFP16-NEXT:    mov h4, v4.h[7]
-; CHECK-GI-NOFP16-NEXT:    fmadd s20, s21, s20, s24
-; CHECK-GI-NOFP16-NEXT:    mov h1, v1.h[7]
-; CHECK-GI-NOFP16-NEXT:    mov h3, v3.h[7]
-; CHECK-GI-NOFP16-NEXT:    fmadd s21, s23, s25, s27
-; CHECK-GI-NOFP16-NEXT:    mov h5, v5.h[7]
-; CHECK-GI-NOFP16-NEXT:    mov v6.h[4], v16.h[0]
-; CHECK-GI-NOFP16-NEXT:    fmadd s18, s19, s28, s18
-; CHECK-GI-NOFP16-NEXT:    fcvt h16, s22
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    mov v7.h[4], v17.h[0]
-; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT:    fcvt h17, s20
-; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT:    mov v6.h[5], v16.h[0]
-; CHECK-GI-NOFP16-NEXT:    fmadd s0, s0, s2, s4
-; CHECK-GI-NOFP16-NEXT:    fcvt h2, s21
-; CHECK-GI-NOFP16-NEXT:    mov v7.h[5], v17.h[0]
-; CHECK-GI-NOFP16-NEXT:    fmadd s1, s1, s3, s5
-; CHECK-GI-NOFP16-NEXT:    fcvt h3, s18
-; CHECK-GI-NOFP16-NEXT:    mov v6.h[6], v2.h[0]
-; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT:    mov v7.h[6], v3.h[0]
-; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
-; CHECK-GI-NOFP16-NEXT:    mov v6.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT:    mov v7.h[7], v1.h[0]
-; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v6.16b
-; CHECK-GI-NOFP16-NEXT:    mov v1.16b, v7.16b
-; CHECK-GI-NOFP16-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NOFP16-NEXT:    fcvtl v6.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v7.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v16.4s, v2.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v17.4s, v3.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v18.4s, v4.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v19.4s, v5.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.4s, v2.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v3.4s, v3.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v4.4s, v4.8h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v5.4s, v5.8h
+; CHECK-GI-NOFP16-NEXT:    fmla v18.4s, v16.4s, v6.4s
+; CHECK-GI-NOFP16-NEXT:    fmla v19.4s, v17.4s, v7.4s
+; CHECK-GI-NOFP16-NEXT:    fmla v4.4s, v2.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT:    fmla v5.4s, v3.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v18.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtn v1.4h, v19.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.8h, v4.4s
+; CHECK-GI-NOFP16-NEXT:    fcvtn2 v1.8h, v5.4s
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fma_v16f16:
@@ -962,27 +734,45 @@ entry:
 }
 
 define <3 x double> @fmuladd_v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) {
-; CHECK-LABEL: fmuladd_v3f64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d6 killed $d6 def $q6
-; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d7 killed $d7 def $q7
-; CHECK-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d5 killed $d5 def $q5
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:    mov v3.d[1], v4.d[0]
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-NEXT:    mov v6.d[1], v7.d[0]
-; CHECK-NEXT:    fmla v6.2d, v3.2d, v0.2d
-; CHECK-NEXT:    ldr d3, [sp]
-; CHECK-NEXT:    fmla v3.2d, v5.2d, v2.2d
-; CHECK-NEXT:    fmov d0, d6
-; CHECK-NEXT:    ext v1.16b, v6.16b, v6.16b, #8
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
-; CHECK-NEXT:    fmov d2, d3
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fmuladd_v3f64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d6 killed $d6 def $q6
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d7 killed $d7 def $q7
+; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    mov v6.d[1], v7.d[0]
+; CHECK-SD-NEXT:    fmla v6.2d, v3.2d, v0.2d
+; CHECK-SD-NEXT:    ldr d3, [sp]
+; CHECK-SD-NEXT:    fmla v3.2d, v5.2d, v2.2d
+; CHECK-SD-NEXT:    fmov d0, d6
+; CHECK-SD-NEXT:    ext v1.16b, v6.16b, v6.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    fmov d2, d3
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fmuladd_v3f64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d6 killed $d6 def $q6
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    // kill: def $d7 killed $d7 def $q7
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    mov v6.d[1], v7.d[0]
+; CHECK-GI-NEXT:    fmla v6.2d, v3.2d, v0.2d
+; CHECK-GI-NEXT:    ldr d0, [sp]
+; CHECK-GI-NEXT:    fmadd d2, d2, d5, d0
+; CHECK-GI-NEXT:    mov d1, v6.d[1]
+; CHECK-GI-NEXT:    fmov d0, d6
+; CHECK-GI-NEXT:    ret
 entry:
   %d = call <3 x double> @llvm.fmuladd.v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c)
   ret <3 x double> %d


        


More information about the llvm-commits mailing list